linux/kernel/bpf/helpers.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3 */
   4#include <linux/bpf.h>
   5#include <linux/rcupdate.h>
   6#include <linux/random.h>
   7#include <linux/smp.h>
   8#include <linux/topology.h>
   9#include <linux/ktime.h>
  10#include <linux/sched.h>
  11#include <linux/uidgid.h>
  12#include <linux/filter.h>
  13#include <linux/ctype.h>
  14#include <linux/jiffies.h>
  15#include <linux/pid_namespace.h>
  16#include <linux/proc_ns.h>
  17#include <linux/security.h>
  18
  19#include "../../lib/kstrtox.h"
  20
  21/* If kernel subsystem is allowing eBPF programs to call this function,
  22 * inside its own verifier_ops->get_func_proto() callback it should return
  23 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
  24 *
  25 * Different map implementations will rely on rcu in map methods
  26 * lookup/update/delete, therefore eBPF programs must run under rcu lock
  27 * if program is allowed to access maps, so check rcu_read_lock_held in
  28 * all three functions.
  29 */
  30BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
  31{
  32        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  33        return (unsigned long) map->ops->map_lookup_elem(map, key);
  34}
  35
  36const struct bpf_func_proto bpf_map_lookup_elem_proto = {
  37        .func           = bpf_map_lookup_elem,
  38        .gpl_only       = false,
  39        .pkt_access     = true,
  40        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
  41        .arg1_type      = ARG_CONST_MAP_PTR,
  42        .arg2_type      = ARG_PTR_TO_MAP_KEY,
  43};
  44
  45BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
  46           void *, value, u64, flags)
  47{
  48        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  49        return map->ops->map_update_elem(map, key, value, flags);
  50}
  51
  52const struct bpf_func_proto bpf_map_update_elem_proto = {
  53        .func           = bpf_map_update_elem,
  54        .gpl_only       = false,
  55        .pkt_access     = true,
  56        .ret_type       = RET_INTEGER,
  57        .arg1_type      = ARG_CONST_MAP_PTR,
  58        .arg2_type      = ARG_PTR_TO_MAP_KEY,
  59        .arg3_type      = ARG_PTR_TO_MAP_VALUE,
  60        .arg4_type      = ARG_ANYTHING,
  61};
  62
  63BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
  64{
  65        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  66        return map->ops->map_delete_elem(map, key);
  67}
  68
  69const struct bpf_func_proto bpf_map_delete_elem_proto = {
  70        .func           = bpf_map_delete_elem,
  71        .gpl_only       = false,
  72        .pkt_access     = true,
  73        .ret_type       = RET_INTEGER,
  74        .arg1_type      = ARG_CONST_MAP_PTR,
  75        .arg2_type      = ARG_PTR_TO_MAP_KEY,
  76};
  77
  78BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
  79{
  80        return map->ops->map_push_elem(map, value, flags);
  81}
  82
  83const struct bpf_func_proto bpf_map_push_elem_proto = {
  84        .func           = bpf_map_push_elem,
  85        .gpl_only       = false,
  86        .pkt_access     = true,
  87        .ret_type       = RET_INTEGER,
  88        .arg1_type      = ARG_CONST_MAP_PTR,
  89        .arg2_type      = ARG_PTR_TO_MAP_VALUE,
  90        .arg3_type      = ARG_ANYTHING,
  91};
  92
  93BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
  94{
  95        return map->ops->map_pop_elem(map, value);
  96}
  97
  98const struct bpf_func_proto bpf_map_pop_elem_proto = {
  99        .func           = bpf_map_pop_elem,
 100        .gpl_only       = false,
 101        .ret_type       = RET_INTEGER,
 102        .arg1_type      = ARG_CONST_MAP_PTR,
 103        .arg2_type      = ARG_PTR_TO_UNINIT_MAP_VALUE,
 104};
 105
 106BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
 107{
 108        return map->ops->map_peek_elem(map, value);
 109}
 110
 111const struct bpf_func_proto bpf_map_peek_elem_proto = {
 112        .func           = bpf_map_peek_elem,
 113        .gpl_only       = false,
 114        .ret_type       = RET_INTEGER,
 115        .arg1_type      = ARG_CONST_MAP_PTR,
 116        .arg2_type      = ARG_PTR_TO_UNINIT_MAP_VALUE,
 117};
 118
 119const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 120        .func           = bpf_user_rnd_u32,
 121        .gpl_only       = false,
 122        .ret_type       = RET_INTEGER,
 123};
 124
 125BPF_CALL_0(bpf_get_smp_processor_id)
 126{
 127        return smp_processor_id();
 128}
 129
 130const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
 131        .func           = bpf_get_smp_processor_id,
 132        .gpl_only       = false,
 133        .ret_type       = RET_INTEGER,
 134};
 135
 136BPF_CALL_0(bpf_get_numa_node_id)
 137{
 138        return numa_node_id();
 139}
 140
 141const struct bpf_func_proto bpf_get_numa_node_id_proto = {
 142        .func           = bpf_get_numa_node_id,
 143        .gpl_only       = false,
 144        .ret_type       = RET_INTEGER,
 145};
 146
 147BPF_CALL_0(bpf_ktime_get_ns)
 148{
 149        /* NMI safe access to clock monotonic */
 150        return ktime_get_mono_fast_ns();
 151}
 152
 153const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 154        .func           = bpf_ktime_get_ns,
 155        .gpl_only       = false,
 156        .ret_type       = RET_INTEGER,
 157};
 158
 159BPF_CALL_0(bpf_ktime_get_boot_ns)
 160{
 161        /* NMI safe access to clock boottime */
 162        return ktime_get_boot_fast_ns();
 163}
 164
 165const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
 166        .func           = bpf_ktime_get_boot_ns,
 167        .gpl_only       = false,
 168        .ret_type       = RET_INTEGER,
 169};
 170
 171BPF_CALL_0(bpf_ktime_get_coarse_ns)
 172{
 173        return ktime_get_coarse_ns();
 174}
 175
 176const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
 177        .func           = bpf_ktime_get_coarse_ns,
 178        .gpl_only       = false,
 179        .ret_type       = RET_INTEGER,
 180};
 181
 182BPF_CALL_0(bpf_get_current_pid_tgid)
 183{
 184        struct task_struct *task = current;
 185
 186        if (unlikely(!task))
 187                return -EINVAL;
 188
 189        return (u64) task->tgid << 32 | task->pid;
 190}
 191
 192const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
 193        .func           = bpf_get_current_pid_tgid,
 194        .gpl_only       = false,
 195        .ret_type       = RET_INTEGER,
 196};
 197
 198BPF_CALL_0(bpf_get_current_uid_gid)
 199{
 200        struct task_struct *task = current;
 201        kuid_t uid;
 202        kgid_t gid;
 203
 204        if (unlikely(!task))
 205                return -EINVAL;
 206
 207        current_uid_gid(&uid, &gid);
 208        return (u64) from_kgid(&init_user_ns, gid) << 32 |
 209                     from_kuid(&init_user_ns, uid);
 210}
 211
 212const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
 213        .func           = bpf_get_current_uid_gid,
 214        .gpl_only       = false,
 215        .ret_type       = RET_INTEGER,
 216};
 217
 218BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
 219{
 220        struct task_struct *task = current;
 221
 222        if (unlikely(!task))
 223                goto err_clear;
 224
 225        strncpy(buf, task->comm, size);
 226
 227        /* Verifier guarantees that size > 0. For task->comm exceeding
 228         * size, guarantee that buf is %NUL-terminated. Unconditionally
 229         * done here to save the size test.
 230         */
 231        buf[size - 1] = 0;
 232        return 0;
 233err_clear:
 234        memset(buf, 0, size);
 235        return -EINVAL;
 236}
 237
 238const struct bpf_func_proto bpf_get_current_comm_proto = {
 239        .func           = bpf_get_current_comm,
 240        .gpl_only       = false,
 241        .ret_type       = RET_INTEGER,
 242        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
 243        .arg2_type      = ARG_CONST_SIZE,
 244};
 245
 246#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
 247
 248static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
 249{
 250        arch_spinlock_t *l = (void *)lock;
 251        union {
 252                __u32 val;
 253                arch_spinlock_t lock;
 254        } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
 255
 256        compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
 257        BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
 258        BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
 259        arch_spin_lock(l);
 260}
 261
 262static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
 263{
 264        arch_spinlock_t *l = (void *)lock;
 265
 266        arch_spin_unlock(l);
 267}
 268
 269#else
 270
 271static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
 272{
 273        atomic_t *l = (void *)lock;
 274
 275        BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
 276        do {
 277                atomic_cond_read_relaxed(l, !VAL);
 278        } while (atomic_xchg(l, 1));
 279}
 280
 281static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
 282{
 283        atomic_t *l = (void *)lock;
 284
 285        atomic_set_release(l, 0);
 286}
 287
 288#endif
 289
 290static DEFINE_PER_CPU(unsigned long, irqsave_flags);
 291
 292static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
 293{
 294        unsigned long flags;
 295
 296        local_irq_save(flags);
 297        __bpf_spin_lock(lock);
 298        __this_cpu_write(irqsave_flags, flags);
 299}
 300
 301notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
 302{
 303        __bpf_spin_lock_irqsave(lock);
 304        return 0;
 305}
 306
 307const struct bpf_func_proto bpf_spin_lock_proto = {
 308        .func           = bpf_spin_lock,
 309        .gpl_only       = false,
 310        .ret_type       = RET_VOID,
 311        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
 312};
 313
 314static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
 315{
 316        unsigned long flags;
 317
 318        flags = __this_cpu_read(irqsave_flags);
 319        __bpf_spin_unlock(lock);
 320        local_irq_restore(flags);
 321}
 322
 323notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
 324{
 325        __bpf_spin_unlock_irqrestore(lock);
 326        return 0;
 327}
 328
 329const struct bpf_func_proto bpf_spin_unlock_proto = {
 330        .func           = bpf_spin_unlock,
 331        .gpl_only       = false,
 332        .ret_type       = RET_VOID,
 333        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
 334};
 335
 336void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 337                           bool lock_src)
 338{
 339        struct bpf_spin_lock *lock;
 340
 341        if (lock_src)
 342                lock = src + map->spin_lock_off;
 343        else
 344                lock = dst + map->spin_lock_off;
 345        preempt_disable();
 346        __bpf_spin_lock_irqsave(lock);
 347        copy_map_value(map, dst, src);
 348        __bpf_spin_unlock_irqrestore(lock);
 349        preempt_enable();
 350}
 351
 352BPF_CALL_0(bpf_jiffies64)
 353{
 354        return get_jiffies_64();
 355}
 356
 357const struct bpf_func_proto bpf_jiffies64_proto = {
 358        .func           = bpf_jiffies64,
 359        .gpl_only       = false,
 360        .ret_type       = RET_INTEGER,
 361};
 362
 363#ifdef CONFIG_CGROUPS
 364BPF_CALL_0(bpf_get_current_cgroup_id)
 365{
 366        struct cgroup *cgrp;
 367        u64 cgrp_id;
 368
 369        rcu_read_lock();
 370        cgrp = task_dfl_cgroup(current);
 371        cgrp_id = cgroup_id(cgrp);
 372        rcu_read_unlock();
 373
 374        return cgrp_id;
 375}
 376
 377const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 378        .func           = bpf_get_current_cgroup_id,
 379        .gpl_only       = false,
 380        .ret_type       = RET_INTEGER,
 381};
 382
 383BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
 384{
 385        struct cgroup *cgrp;
 386        struct cgroup *ancestor;
 387        u64 cgrp_id;
 388
 389        rcu_read_lock();
 390        cgrp = task_dfl_cgroup(current);
 391        ancestor = cgroup_ancestor(cgrp, ancestor_level);
 392        cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
 393        rcu_read_unlock();
 394
 395        return cgrp_id;
 396}
 397
 398const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
 399        .func           = bpf_get_current_ancestor_cgroup_id,
 400        .gpl_only       = false,
 401        .ret_type       = RET_INTEGER,
 402        .arg1_type      = ARG_ANYTHING,
 403};
 404
 405#ifdef CONFIG_CGROUP_BPF
 406
 407BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 408{
 409        /* flags argument is not used now,
 410         * but provides an ability to extend the API.
 411         * verifier checks that its value is correct.
 412         */
 413        enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 414        struct bpf_cgroup_storage *storage;
 415        struct bpf_cg_run_ctx *ctx;
 416        void *ptr;
 417
 418        /* get current cgroup storage from BPF run context */
 419        ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
 420        storage = ctx->prog_item->cgroup_storage[stype];
 421
 422        if (stype == BPF_CGROUP_STORAGE_SHARED)
 423                ptr = &READ_ONCE(storage->buf)->data[0];
 424        else
 425                ptr = this_cpu_ptr(storage->percpu_buf);
 426
 427        return (unsigned long)ptr;
 428}
 429
 430const struct bpf_func_proto bpf_get_local_storage_proto = {
 431        .func           = bpf_get_local_storage,
 432        .gpl_only       = false,
 433        .ret_type       = RET_PTR_TO_MAP_VALUE,
 434        .arg1_type      = ARG_CONST_MAP_PTR,
 435        .arg2_type      = ARG_ANYTHING,
 436};
 437#endif
 438
 439#define BPF_STRTOX_BASE_MASK 0x1F
 440
 441static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
 442                          unsigned long long *res, bool *is_negative)
 443{
 444        unsigned int base = flags & BPF_STRTOX_BASE_MASK;
 445        const char *cur_buf = buf;
 446        size_t cur_len = buf_len;
 447        unsigned int consumed;
 448        size_t val_len;
 449        char str[64];
 450
 451        if (!buf || !buf_len || !res || !is_negative)
 452                return -EINVAL;
 453
 454        if (base != 0 && base != 8 && base != 10 && base != 16)
 455                return -EINVAL;
 456
 457        if (flags & ~BPF_STRTOX_BASE_MASK)
 458                return -EINVAL;
 459
 460        while (cur_buf < buf + buf_len && isspace(*cur_buf))
 461                ++cur_buf;
 462
 463        *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
 464        if (*is_negative)
 465                ++cur_buf;
 466
 467        consumed = cur_buf - buf;
 468        cur_len -= consumed;
 469        if (!cur_len)
 470                return -EINVAL;
 471
 472        cur_len = min(cur_len, sizeof(str) - 1);
 473        memcpy(str, cur_buf, cur_len);
 474        str[cur_len] = '\0';
 475        cur_buf = str;
 476
 477        cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
 478        val_len = _parse_integer(cur_buf, base, res);
 479
 480        if (val_len & KSTRTOX_OVERFLOW)
 481                return -ERANGE;
 482
 483        if (val_len == 0)
 484                return -EINVAL;
 485
 486        cur_buf += val_len;
 487        consumed += cur_buf - str;
 488
 489        return consumed;
 490}
 491
 492static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
 493                         long long *res)
 494{
 495        unsigned long long _res;
 496        bool is_negative;
 497        int err;
 498
 499        err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
 500        if (err < 0)
 501                return err;
 502        if (is_negative) {
 503                if ((long long)-_res > 0)
 504                        return -ERANGE;
 505                *res = -_res;
 506        } else {
 507                if ((long long)_res < 0)
 508                        return -ERANGE;
 509                *res = _res;
 510        }
 511        return err;
 512}
 513
 514BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
 515           long *, res)
 516{
 517        long long _res;
 518        int err;
 519
 520        err = __bpf_strtoll(buf, buf_len, flags, &_res);
 521        if (err < 0)
 522                return err;
 523        if (_res != (long)_res)
 524                return -ERANGE;
 525        *res = _res;
 526        return err;
 527}
 528
 529const struct bpf_func_proto bpf_strtol_proto = {
 530        .func           = bpf_strtol,
 531        .gpl_only       = false,
 532        .ret_type       = RET_INTEGER,
 533        .arg1_type      = ARG_PTR_TO_MEM,
 534        .arg2_type      = ARG_CONST_SIZE,
 535        .arg3_type      = ARG_ANYTHING,
 536        .arg4_type      = ARG_PTR_TO_LONG,
 537};
 538
 539BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
 540           unsigned long *, res)
 541{
 542        unsigned long long _res;
 543        bool is_negative;
 544        int err;
 545
 546        err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
 547        if (err < 0)
 548                return err;
 549        if (is_negative)
 550                return -EINVAL;
 551        if (_res != (unsigned long)_res)
 552                return -ERANGE;
 553        *res = _res;
 554        return err;
 555}
 556
 557const struct bpf_func_proto bpf_strtoul_proto = {
 558        .func           = bpf_strtoul,
 559        .gpl_only       = false,
 560        .ret_type       = RET_INTEGER,
 561        .arg1_type      = ARG_PTR_TO_MEM,
 562        .arg2_type      = ARG_CONST_SIZE,
 563        .arg3_type      = ARG_ANYTHING,
 564        .arg4_type      = ARG_PTR_TO_LONG,
 565};
 566#endif
 567
 568BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
 569           struct bpf_pidns_info *, nsdata, u32, size)
 570{
 571        struct task_struct *task = current;
 572        struct pid_namespace *pidns;
 573        int err = -EINVAL;
 574
 575        if (unlikely(size != sizeof(struct bpf_pidns_info)))
 576                goto clear;
 577
 578        if (unlikely((u64)(dev_t)dev != dev))
 579                goto clear;
 580
 581        if (unlikely(!task))
 582                goto clear;
 583
 584        pidns = task_active_pid_ns(task);
 585        if (unlikely(!pidns)) {
 586                err = -ENOENT;
 587                goto clear;
 588        }
 589
 590        if (!ns_match(&pidns->ns, (dev_t)dev, ino))
 591                goto clear;
 592
 593        nsdata->pid = task_pid_nr_ns(task, pidns);
 594        nsdata->tgid = task_tgid_nr_ns(task, pidns);
 595        return 0;
 596clear:
 597        memset((void *)nsdata, 0, (size_t) size);
 598        return err;
 599}
 600
 601const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
 602        .func           = bpf_get_ns_current_pid_tgid,
 603        .gpl_only       = false,
 604        .ret_type       = RET_INTEGER,
 605        .arg1_type      = ARG_ANYTHING,
 606        .arg2_type      = ARG_ANYTHING,
 607        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
 608        .arg4_type      = ARG_CONST_SIZE,
 609};
 610
 611static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
 612        .func           = bpf_get_raw_cpu_id,
 613        .gpl_only       = false,
 614        .ret_type       = RET_INTEGER,
 615};
 616
 617BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
 618           u64, flags, void *, data, u64, size)
 619{
 620        if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
 621                return -EINVAL;
 622
 623        return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
 624}
 625
 626const struct bpf_func_proto bpf_event_output_data_proto =  {
 627        .func           = bpf_event_output_data,
 628        .gpl_only       = true,
 629        .ret_type       = RET_INTEGER,
 630        .arg1_type      = ARG_PTR_TO_CTX,
 631        .arg2_type      = ARG_CONST_MAP_PTR,
 632        .arg3_type      = ARG_ANYTHING,
 633        .arg4_type      = ARG_PTR_TO_MEM,
 634        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 635};
 636
 637BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
 638           const void __user *, user_ptr)
 639{
 640        int ret = copy_from_user(dst, user_ptr, size);
 641
 642        if (unlikely(ret)) {
 643                memset(dst, 0, size);
 644                ret = -EFAULT;
 645        }
 646
 647        return ret;
 648}
 649
 650const struct bpf_func_proto bpf_copy_from_user_proto = {
 651        .func           = bpf_copy_from_user,
 652        .gpl_only       = false,
 653        .ret_type       = RET_INTEGER,
 654        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
 655        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
 656        .arg3_type      = ARG_ANYTHING,
 657};
 658
 659BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 660{
 661        if (cpu >= nr_cpu_ids)
 662                return (unsigned long)NULL;
 663
 664        return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
 665}
 666
 667const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
 668        .func           = bpf_per_cpu_ptr,
 669        .gpl_only       = false,
 670        .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
 671        .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
 672        .arg2_type      = ARG_ANYTHING,
 673};
 674
 675BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
 676{
 677        return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
 678}
 679
 680const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
 681        .func           = bpf_this_cpu_ptr,
 682        .gpl_only       = false,
 683        .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID,
 684        .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
 685};
 686
 687static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 688                size_t bufsz)
 689{
 690        void __user *user_ptr = (__force void __user *)unsafe_ptr;
 691
 692        buf[0] = 0;
 693
 694        switch (fmt_ptype) {
 695        case 's':
 696#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 697                if ((unsigned long)unsafe_ptr < TASK_SIZE)
 698                        return strncpy_from_user_nofault(buf, user_ptr, bufsz);
 699                fallthrough;
 700#endif
 701        case 'k':
 702                return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
 703        case 'u':
 704                return strncpy_from_user_nofault(buf, user_ptr, bufsz);
 705        }
 706
 707        return -EINVAL;
 708}
 709
 710/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
 711 * arguments representation.
 712 */
 713#define MAX_BPRINTF_BUF_LEN     512
 714
 715/* Support executing three nested bprintf helper calls on a given CPU */
 716#define MAX_BPRINTF_NEST_LEVEL  3
 717struct bpf_bprintf_buffers {
 718        char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
 719};
 720static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
 721static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
 722
 723static int try_get_fmt_tmp_buf(char **tmp_buf)
 724{
 725        struct bpf_bprintf_buffers *bufs;
 726        int nest_level;
 727
 728        preempt_disable();
 729        nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
 730        if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
 731                this_cpu_dec(bpf_bprintf_nest_level);
 732                preempt_enable();
 733                return -EBUSY;
 734        }
 735        bufs = this_cpu_ptr(&bpf_bprintf_bufs);
 736        *tmp_buf = bufs->tmp_bufs[nest_level - 1];
 737
 738        return 0;
 739}
 740
 741void bpf_bprintf_cleanup(void)
 742{
 743        if (this_cpu_read(bpf_bprintf_nest_level)) {
 744                this_cpu_dec(bpf_bprintf_nest_level);
 745                preempt_enable();
 746        }
 747}
 748
 749/*
 750 * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
 751 *
 752 * Returns a negative value if fmt is an invalid format string or 0 otherwise.
 753 *
 754 * This can be used in two ways:
 755 * - Format string verification only: when bin_args is NULL
 756 * - Arguments preparation: in addition to the above verification, it writes in
 757 *   bin_args a binary representation of arguments usable by bstr_printf where
 758 *   pointers from BPF have been sanitized.
 759 *
 760 * In argument preparation mode, if 0 is returned, safe temporary buffers are
 761 * allocated and bpf_bprintf_cleanup should be called to free them after use.
 762 */
 763int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 764                        u32 **bin_args, u32 num_args)
 765{
 766        char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
 767        size_t sizeof_cur_arg, sizeof_cur_ip;
 768        int err, i, num_spec = 0;
 769        u64 cur_arg;
 770        char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
 771
 772        fmt_end = strnchr(fmt, fmt_size, 0);
 773        if (!fmt_end)
 774                return -EINVAL;
 775        fmt_size = fmt_end - fmt;
 776
 777        if (bin_args) {
 778                if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
 779                        return -EBUSY;
 780
 781                tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
 782                *bin_args = (u32 *)tmp_buf;
 783        }
 784
 785        for (i = 0; i < fmt_size; i++) {
 786                if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
 787                        err = -EINVAL;
 788                        goto out;
 789                }
 790
 791                if (fmt[i] != '%')
 792                        continue;
 793
 794                if (fmt[i + 1] == '%') {
 795                        i++;
 796                        continue;
 797                }
 798
 799                if (num_spec >= num_args) {
 800                        err = -EINVAL;
 801                        goto out;
 802                }
 803
 804                /* The string is zero-terminated so if fmt[i] != 0, we can
 805                 * always access fmt[i + 1], in the worst case it will be a 0
 806                 */
 807                i++;
 808
 809                /* skip optional "[0 +-][num]" width formatting field */
 810                while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
 811                       fmt[i] == ' ')
 812                        i++;
 813                if (fmt[i] >= '1' && fmt[i] <= '9') {
 814                        i++;
 815                        while (fmt[i] >= '0' && fmt[i] <= '9')
 816                                i++;
 817                }
 818
 819                if (fmt[i] == 'p') {
 820                        sizeof_cur_arg = sizeof(long);
 821
 822                        if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
 823                            fmt[i + 2] == 's') {
 824                                fmt_ptype = fmt[i + 1];
 825                                i += 2;
 826                                goto fmt_str;
 827                        }
 828
 829                        if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
 830                            ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
 831                            fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
 832                            fmt[i + 1] == 'S') {
 833                                /* just kernel pointers */
 834                                if (tmp_buf)
 835                                        cur_arg = raw_args[num_spec];
 836                                i++;
 837                                goto nocopy_fmt;
 838                        }
 839
 840                        if (fmt[i + 1] == 'B') {
 841                                if (tmp_buf)  {
 842                                        err = snprintf(tmp_buf,
 843                                                       (tmp_buf_end - tmp_buf),
 844                                                       "%pB",
 845                                                       (void *)(long)raw_args[num_spec]);
 846                                        tmp_buf += (err + 1);
 847                                }
 848
 849                                i++;
 850                                num_spec++;
 851                                continue;
 852                        }
 853
 854                        /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
 855                        if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
 856                            (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
 857                                err = -EINVAL;
 858                                goto out;
 859                        }
 860
 861                        i += 2;
 862                        if (!tmp_buf)
 863                                goto nocopy_fmt;
 864
 865                        sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
 866                        if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
 867                                err = -ENOSPC;
 868                                goto out;
 869                        }
 870
 871                        unsafe_ptr = (char *)(long)raw_args[num_spec];
 872                        err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
 873                                                       sizeof_cur_ip);
 874                        if (err < 0)
 875                                memset(cur_ip, 0, sizeof_cur_ip);
 876
 877                        /* hack: bstr_printf expects IP addresses to be
 878                         * pre-formatted as strings, ironically, the easiest way
 879                         * to do that is to call snprintf.
 880                         */
 881                        ip_spec[2] = fmt[i - 1];
 882                        ip_spec[3] = fmt[i];
 883                        err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
 884                                       ip_spec, &cur_ip);
 885
 886                        tmp_buf += err + 1;
 887                        num_spec++;
 888
 889                        continue;
 890                } else if (fmt[i] == 's') {
 891                        fmt_ptype = fmt[i];
 892fmt_str:
 893                        if (fmt[i + 1] != 0 &&
 894                            !isspace(fmt[i + 1]) &&
 895                            !ispunct(fmt[i + 1])) {
 896                                err = -EINVAL;
 897                                goto out;
 898                        }
 899
 900                        if (!tmp_buf)
 901                                goto nocopy_fmt;
 902
 903                        if (tmp_buf_end == tmp_buf) {
 904                                err = -ENOSPC;
 905                                goto out;
 906                        }
 907
 908                        unsafe_ptr = (char *)(long)raw_args[num_spec];
 909                        err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
 910                                                    fmt_ptype,
 911                                                    tmp_buf_end - tmp_buf);
 912                        if (err < 0) {
 913                                tmp_buf[0] = '\0';
 914                                err = 1;
 915                        }
 916
 917                        tmp_buf += err;
 918                        num_spec++;
 919
 920                        continue;
 921                } else if (fmt[i] == 'c') {
 922                        if (!tmp_buf)
 923                                goto nocopy_fmt;
 924
 925                        if (tmp_buf_end == tmp_buf) {
 926                                err = -ENOSPC;
 927                                goto out;
 928                        }
 929
 930                        *tmp_buf = raw_args[num_spec];
 931                        tmp_buf++;
 932                        num_spec++;
 933
 934                        continue;
 935                }
 936
 937                sizeof_cur_arg = sizeof(int);
 938
 939                if (fmt[i] == 'l') {
 940                        sizeof_cur_arg = sizeof(long);
 941                        i++;
 942                }
 943                if (fmt[i] == 'l') {
 944                        sizeof_cur_arg = sizeof(long long);
 945                        i++;
 946                }
 947
 948                if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
 949                    fmt[i] != 'x' && fmt[i] != 'X') {
 950                        err = -EINVAL;
 951                        goto out;
 952                }
 953
 954                if (tmp_buf)
 955                        cur_arg = raw_args[num_spec];
 956nocopy_fmt:
 957                if (tmp_buf) {
 958                        tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
 959                        if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
 960                                err = -ENOSPC;
 961                                goto out;
 962                        }
 963
 964                        if (sizeof_cur_arg == 8) {
 965                                *(u32 *)tmp_buf = *(u32 *)&cur_arg;
 966                                *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
 967                        } else {
 968                                *(u32 *)tmp_buf = (u32)(long)cur_arg;
 969                        }
 970                        tmp_buf += sizeof_cur_arg;
 971                }
 972                num_spec++;
 973        }
 974
 975        err = 0;
 976out:
 977        if (err)
 978                bpf_bprintf_cleanup();
 979        return err;
 980}
 981
 982BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
 983           const void *, data, u32, data_len)
 984{
 985        int err, num_args;
 986        u32 *bin_args;
 987
 988        if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
 989            (data_len && !data))
 990                return -EINVAL;
 991        num_args = data_len / 8;
 992
 993        /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
 994         * can safely give an unbounded size.
 995         */
 996        err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
 997        if (err < 0)
 998                return err;
 999
1000        err = bstr_printf(str, str_size, fmt, bin_args);
1001
1002        bpf_bprintf_cleanup();
1003
1004        return err + 1;
1005}
1006
1007const struct bpf_func_proto bpf_snprintf_proto = {
1008        .func           = bpf_snprintf,
1009        .gpl_only       = true,
1010        .ret_type       = RET_INTEGER,
1011        .arg1_type      = ARG_PTR_TO_MEM_OR_NULL,
1012        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
1013        .arg3_type      = ARG_PTR_TO_CONST_STR,
1014        .arg4_type      = ARG_PTR_TO_MEM_OR_NULL,
1015        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
1016};
1017
1018/* BPF map elements can contain 'struct bpf_timer'.
1019 * Such map owns all of its BPF timers.
1020 * 'struct bpf_timer' is allocated as part of map element allocation
1021 * and it's zero initialized.
1022 * That space is used to keep 'struct bpf_timer_kern'.
1023 * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1024 * remembers 'struct bpf_map *' pointer it's part of.
1025 * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1026 * bpf_timer_start() arms the timer.
1027 * If user space reference to a map goes to zero at this point
1028 * ops->map_release_uref callback is responsible for cancelling the timers,
1029 * freeing their memory, and decrementing prog's refcnts.
1030 * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1031 * Inner maps can contain bpf timers as well. ops->map_release_uref is
1032 * freeing the timers when inner map is replaced or deleted by user space.
1033 */
1034struct bpf_hrtimer {
1035        struct hrtimer timer;
1036        struct bpf_map *map;
1037        struct bpf_prog *prog;
1038        void __rcu *callback_fn;
1039        void *value;
1040};
1041
1042/* the actual struct hidden inside uapi struct bpf_timer */
1043struct bpf_timer_kern {
1044        struct bpf_hrtimer *timer;
1045        /* bpf_spin_lock is used here instead of spinlock_t to make
1046         * sure that it always fits into space resereved by struct bpf_timer
1047         * regardless of LOCKDEP and spinlock debug flags.
1048         */
1049        struct bpf_spin_lock lock;
1050} __attribute__((aligned(8)));
1051
1052static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1053
1054static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1055{
1056        struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1057        struct bpf_map *map = t->map;
1058        void *value = t->value;
1059        bpf_callback_t callback_fn;
1060        void *key;
1061        u32 idx;
1062
1063        callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
1064        if (!callback_fn)
1065                goto out;
1066
1067        /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1068         * cannot be preempted by another bpf_timer_cb() on the same cpu.
1069         * Remember the timer this callback is servicing to prevent
1070         * deadlock if callback_fn() calls bpf_timer_cancel() or
1071         * bpf_map_delete_elem() on the same timer.
1072         */
1073        this_cpu_write(hrtimer_running, t);
1074        if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1075                struct bpf_array *array = container_of(map, struct bpf_array, map);
1076
1077                /* compute the key */
1078                idx = ((char *)value - array->value) / array->elem_size;
1079                key = &idx;
1080        } else { /* hash or lru */
1081                key = value - round_up(map->key_size, 8);
1082        }
1083
1084        callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
1085        /* The verifier checked that return value is zero. */
1086
1087        this_cpu_write(hrtimer_running, NULL);
1088out:
1089        return HRTIMER_NORESTART;
1090}
1091
1092BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
1093           u64, flags)
1094{
1095        clockid_t clockid = flags & (MAX_CLOCKS - 1);
1096        struct bpf_hrtimer *t;
1097        int ret = 0;
1098
1099        BUILD_BUG_ON(MAX_CLOCKS != 16);
1100        BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
1101        BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
1102
1103        if (in_nmi())
1104                return -EOPNOTSUPP;
1105
1106        if (flags >= MAX_CLOCKS ||
1107            /* similar to timerfd except _ALARM variants are not supported */
1108            (clockid != CLOCK_MONOTONIC &&
1109             clockid != CLOCK_REALTIME &&
1110             clockid != CLOCK_BOOTTIME))
1111                return -EINVAL;
1112        __bpf_spin_lock_irqsave(&timer->lock);
1113        t = timer->timer;
1114        if (t) {
1115                ret = -EBUSY;
1116                goto out;
1117        }
1118        if (!atomic64_read(&map->usercnt)) {
1119                /* maps with timers must be either held by user space
1120                 * or pinned in bpffs.
1121                 */
1122                ret = -EPERM;
1123                goto out;
1124        }
1125        /* allocate hrtimer via map_kmalloc to use memcg accounting */
1126        t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
1127        if (!t) {
1128                ret = -ENOMEM;
1129                goto out;
1130        }
1131        t->value = (void *)timer - map->timer_off;
1132        t->map = map;
1133        t->prog = NULL;
1134        rcu_assign_pointer(t->callback_fn, NULL);
1135        hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
1136        t->timer.function = bpf_timer_cb;
1137        timer->timer = t;
1138out:
1139        __bpf_spin_unlock_irqrestore(&timer->lock);
1140        return ret;
1141}
1142
1143static const struct bpf_func_proto bpf_timer_init_proto = {
1144        .func           = bpf_timer_init,
1145        .gpl_only       = true,
1146        .ret_type       = RET_INTEGER,
1147        .arg1_type      = ARG_PTR_TO_TIMER,
1148        .arg2_type      = ARG_CONST_MAP_PTR,
1149        .arg3_type      = ARG_ANYTHING,
1150};
1151
1152BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
1153           struct bpf_prog_aux *, aux)
1154{
1155        struct bpf_prog *prev, *prog = aux->prog;
1156        struct bpf_hrtimer *t;
1157        int ret = 0;
1158
1159        if (in_nmi())
1160                return -EOPNOTSUPP;
1161        __bpf_spin_lock_irqsave(&timer->lock);
1162        t = timer->timer;
1163        if (!t) {
1164                ret = -EINVAL;
1165                goto out;
1166        }
1167        if (!atomic64_read(&t->map->usercnt)) {
1168                /* maps with timers must be either held by user space
1169                 * or pinned in bpffs. Otherwise timer might still be
1170                 * running even when bpf prog is detached and user space
1171                 * is gone, since map_release_uref won't ever be called.
1172                 */
1173                ret = -EPERM;
1174                goto out;
1175        }
1176        prev = t->prog;
1177        if (prev != prog) {
1178                /* Bump prog refcnt once. Every bpf_timer_set_callback()
1179                 * can pick different callback_fn-s within the same prog.
1180                 */
1181                prog = bpf_prog_inc_not_zero(prog);
1182                if (IS_ERR(prog)) {
1183                        ret = PTR_ERR(prog);
1184                        goto out;
1185                }
1186                if (prev)
1187                        /* Drop prev prog refcnt when swapping with new prog */
1188                        bpf_prog_put(prev);
1189                t->prog = prog;
1190        }
1191        rcu_assign_pointer(t->callback_fn, callback_fn);
1192out:
1193        __bpf_spin_unlock_irqrestore(&timer->lock);
1194        return ret;
1195}
1196
1197static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1198        .func           = bpf_timer_set_callback,
1199        .gpl_only       = true,
1200        .ret_type       = RET_INTEGER,
1201        .arg1_type      = ARG_PTR_TO_TIMER,
1202        .arg2_type      = ARG_PTR_TO_FUNC,
1203};
1204
1205BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
1206{
1207        struct bpf_hrtimer *t;
1208        int ret = 0;
1209
1210        if (in_nmi())
1211                return -EOPNOTSUPP;
1212        if (flags)
1213                return -EINVAL;
1214        __bpf_spin_lock_irqsave(&timer->lock);
1215        t = timer->timer;
1216        if (!t || !t->prog) {
1217                ret = -EINVAL;
1218                goto out;
1219        }
1220        hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
1221out:
1222        __bpf_spin_unlock_irqrestore(&timer->lock);
1223        return ret;
1224}
1225
1226static const struct bpf_func_proto bpf_timer_start_proto = {
1227        .func           = bpf_timer_start,
1228        .gpl_only       = true,
1229        .ret_type       = RET_INTEGER,
1230        .arg1_type      = ARG_PTR_TO_TIMER,
1231        .arg2_type      = ARG_ANYTHING,
1232        .arg3_type      = ARG_ANYTHING,
1233};
1234
1235static void drop_prog_refcnt(struct bpf_hrtimer *t)
1236{
1237        struct bpf_prog *prog = t->prog;
1238
1239        if (prog) {
1240                bpf_prog_put(prog);
1241                t->prog = NULL;
1242                rcu_assign_pointer(t->callback_fn, NULL);
1243        }
1244}
1245
1246BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
1247{
1248        struct bpf_hrtimer *t;
1249        int ret = 0;
1250
1251        if (in_nmi())
1252                return -EOPNOTSUPP;
1253        __bpf_spin_lock_irqsave(&timer->lock);
1254        t = timer->timer;
1255        if (!t) {
1256                ret = -EINVAL;
1257                goto out;
1258        }
1259        if (this_cpu_read(hrtimer_running) == t) {
1260                /* If bpf callback_fn is trying to bpf_timer_cancel()
1261                 * its own timer the hrtimer_cancel() will deadlock
1262                 * since it waits for callback_fn to finish
1263                 */
1264                ret = -EDEADLK;
1265                goto out;
1266        }
1267        drop_prog_refcnt(t);
1268out:
1269        __bpf_spin_unlock_irqrestore(&timer->lock);
1270        /* Cancel the timer and wait for associated callback to finish
1271         * if it was running.
1272         */
1273        ret = ret ?: hrtimer_cancel(&t->timer);
1274        return ret;
1275}
1276
1277static const struct bpf_func_proto bpf_timer_cancel_proto = {
1278        .func           = bpf_timer_cancel,
1279        .gpl_only       = true,
1280        .ret_type       = RET_INTEGER,
1281        .arg1_type      = ARG_PTR_TO_TIMER,
1282};
1283
1284/* This function is called by map_delete/update_elem for individual element and
1285 * by ops->map_release_uref when the user space reference to a map reaches zero.
1286 */
1287void bpf_timer_cancel_and_free(void *val)
1288{
1289        struct bpf_timer_kern *timer = val;
1290        struct bpf_hrtimer *t;
1291
1292        /* Performance optimization: read timer->timer without lock first. */
1293        if (!READ_ONCE(timer->timer))
1294                return;
1295
1296        __bpf_spin_lock_irqsave(&timer->lock);
1297        /* re-read it under lock */
1298        t = timer->timer;
1299        if (!t)
1300                goto out;
1301        drop_prog_refcnt(t);
1302        /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1303         * this timer, since it won't be initialized.
1304         */
1305        timer->timer = NULL;
1306out:
1307        __bpf_spin_unlock_irqrestore(&timer->lock);
1308        if (!t)
1309                return;
1310        /* Cancel the timer and wait for callback to complete if it was running.
1311         * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1312         * right after for both preallocated and non-preallocated maps.
1313         * The timer->timer = NULL was already done and no code path can
1314         * see address 't' anymore.
1315         *
1316         * Check that bpf_map_delete/update_elem() wasn't called from timer
1317         * callback_fn. In such case don't call hrtimer_cancel() (since it will
1318         * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1319         * return -1). Though callback_fn is still running on this cpu it's
1320         * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1321         * from 't'. The bpf subprog callback_fn won't be able to access 't',
1322         * since timer->timer = NULL was already done. The timer will be
1323         * effectively cancelled because bpf_timer_cb() will return
1324         * HRTIMER_NORESTART.
1325         */
1326        if (this_cpu_read(hrtimer_running) != t)
1327                hrtimer_cancel(&t->timer);
1328        kfree(t);
1329}
1330
1331const struct bpf_func_proto bpf_get_current_task_proto __weak;
1332const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1333const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1334const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1335const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1336const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1337const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1338
1339const struct bpf_func_proto *
1340bpf_base_func_proto(enum bpf_func_id func_id)
1341{
1342        switch (func_id) {
1343        case BPF_FUNC_map_lookup_elem:
1344                return &bpf_map_lookup_elem_proto;
1345        case BPF_FUNC_map_update_elem:
1346                return &bpf_map_update_elem_proto;
1347        case BPF_FUNC_map_delete_elem:
1348                return &bpf_map_delete_elem_proto;
1349        case BPF_FUNC_map_push_elem:
1350                return &bpf_map_push_elem_proto;
1351        case BPF_FUNC_map_pop_elem:
1352                return &bpf_map_pop_elem_proto;
1353        case BPF_FUNC_map_peek_elem:
1354                return &bpf_map_peek_elem_proto;
1355        case BPF_FUNC_get_prandom_u32:
1356                return &bpf_get_prandom_u32_proto;
1357        case BPF_FUNC_get_smp_processor_id:
1358                return &bpf_get_raw_smp_processor_id_proto;
1359        case BPF_FUNC_get_numa_node_id:
1360                return &bpf_get_numa_node_id_proto;
1361        case BPF_FUNC_tail_call:
1362                return &bpf_tail_call_proto;
1363        case BPF_FUNC_ktime_get_ns:
1364                return &bpf_ktime_get_ns_proto;
1365        case BPF_FUNC_ktime_get_boot_ns:
1366                return &bpf_ktime_get_boot_ns_proto;
1367        case BPF_FUNC_ringbuf_output:
1368                return &bpf_ringbuf_output_proto;
1369        case BPF_FUNC_ringbuf_reserve:
1370                return &bpf_ringbuf_reserve_proto;
1371        case BPF_FUNC_ringbuf_submit:
1372                return &bpf_ringbuf_submit_proto;
1373        case BPF_FUNC_ringbuf_discard:
1374                return &bpf_ringbuf_discard_proto;
1375        case BPF_FUNC_ringbuf_query:
1376                return &bpf_ringbuf_query_proto;
1377        case BPF_FUNC_for_each_map_elem:
1378                return &bpf_for_each_map_elem_proto;
1379        default:
1380                break;
1381        }
1382
1383        if (!bpf_capable())
1384                return NULL;
1385
1386        switch (func_id) {
1387        case BPF_FUNC_spin_lock:
1388                return &bpf_spin_lock_proto;
1389        case BPF_FUNC_spin_unlock:
1390                return &bpf_spin_unlock_proto;
1391        case BPF_FUNC_jiffies64:
1392                return &bpf_jiffies64_proto;
1393        case BPF_FUNC_per_cpu_ptr:
1394                return &bpf_per_cpu_ptr_proto;
1395        case BPF_FUNC_this_cpu_ptr:
1396                return &bpf_this_cpu_ptr_proto;
1397        case BPF_FUNC_timer_init:
1398                return &bpf_timer_init_proto;
1399        case BPF_FUNC_timer_set_callback:
1400                return &bpf_timer_set_callback_proto;
1401        case BPF_FUNC_timer_start:
1402                return &bpf_timer_start_proto;
1403        case BPF_FUNC_timer_cancel:
1404                return &bpf_timer_cancel_proto;
1405        default:
1406                break;
1407        }
1408
1409        if (!perfmon_capable())
1410                return NULL;
1411
1412        switch (func_id) {
1413        case BPF_FUNC_trace_printk:
1414                return bpf_get_trace_printk_proto();
1415        case BPF_FUNC_get_current_task:
1416                return &bpf_get_current_task_proto;
1417        case BPF_FUNC_get_current_task_btf:
1418                return &bpf_get_current_task_btf_proto;
1419        case BPF_FUNC_probe_read_user:
1420                return &bpf_probe_read_user_proto;
1421        case BPF_FUNC_probe_read_kernel:
1422                return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1423                       NULL : &bpf_probe_read_kernel_proto;
1424        case BPF_FUNC_probe_read_user_str:
1425                return &bpf_probe_read_user_str_proto;
1426        case BPF_FUNC_probe_read_kernel_str:
1427                return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1428                       NULL : &bpf_probe_read_kernel_str_proto;
1429        case BPF_FUNC_snprintf_btf:
1430                return &bpf_snprintf_btf_proto;
1431        case BPF_FUNC_snprintf:
1432                return &bpf_snprintf_proto;
1433        case BPF_FUNC_task_pt_regs:
1434                return &bpf_task_pt_regs_proto;
1435        case BPF_FUNC_trace_vprintk:
1436                return bpf_get_trace_vprintk_proto();
1437        default:
1438                return NULL;
1439        }
1440}
1441