linux/net/core/filter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Linux Socket Filter - Kernel level socket filtering
   4 *
   5 * Based on the design of the Berkeley Packet Filter. The new
   6 * internal format has been designed by PLUMgrid:
   7 *
   8 *      Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
   9 *
  10 * Authors:
  11 *
  12 *      Jay Schulist <jschlst@samba.org>
  13 *      Alexei Starovoitov <ast@plumgrid.com>
  14 *      Daniel Borkmann <dborkman@redhat.com>
  15 *
  16 * Andi Kleen - Fix a few bad bugs and races.
  17 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  18 */
  19
  20#include <linux/atomic.h>
  21#include <linux/module.h>
  22#include <linux/types.h>
  23#include <linux/mm.h>
  24#include <linux/fcntl.h>
  25#include <linux/socket.h>
  26#include <linux/sock_diag.h>
  27#include <linux/in.h>
  28#include <linux/inet.h>
  29#include <linux/netdevice.h>
  30#include <linux/if_packet.h>
  31#include <linux/if_arp.h>
  32#include <linux/gfp.h>
  33#include <net/inet_common.h>
  34#include <net/ip.h>
  35#include <net/protocol.h>
  36#include <net/netlink.h>
  37#include <linux/skbuff.h>
  38#include <linux/skmsg.h>
  39#include <net/sock.h>
  40#include <net/flow_dissector.h>
  41#include <linux/errno.h>
  42#include <linux/timer.h>
  43#include <linux/uaccess.h>
  44#include <asm/unaligned.h>
  45#include <linux/filter.h>
  46#include <linux/ratelimit.h>
  47#include <linux/seccomp.h>
  48#include <linux/if_vlan.h>
  49#include <linux/bpf.h>
  50#include <linux/btf.h>
  51#include <net/sch_generic.h>
  52#include <net/cls_cgroup.h>
  53#include <net/dst_metadata.h>
  54#include <net/dst.h>
  55#include <net/sock_reuseport.h>
  56#include <net/busy_poll.h>
  57#include <net/tcp.h>
  58#include <net/xfrm.h>
  59#include <net/udp.h>
  60#include <linux/bpf_trace.h>
  61#include <net/xdp_sock.h>
  62#include <linux/inetdevice.h>
  63#include <net/inet_hashtables.h>
  64#include <net/inet6_hashtables.h>
  65#include <net/ip_fib.h>
  66#include <net/nexthop.h>
  67#include <net/flow.h>
  68#include <net/arp.h>
  69#include <net/ipv6.h>
  70#include <net/net_namespace.h>
  71#include <linux/seg6_local.h>
  72#include <net/seg6.h>
  73#include <net/seg6_local.h>
  74#include <net/lwtunnel.h>
  75#include <net/ipv6_stubs.h>
  76#include <net/bpf_sk_storage.h>
  77#include <net/transp_v6.h>
  78#include <linux/btf_ids.h>
  79#include <net/tls.h>
  80#include <net/xdp.h>
  81
  82static const struct bpf_func_proto *
  83bpf_sk_base_func_proto(enum bpf_func_id func_id);
  84
  85int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
  86{
  87        if (in_compat_syscall()) {
  88                struct compat_sock_fprog f32;
  89
  90                if (len != sizeof(f32))
  91                        return -EINVAL;
  92                if (copy_from_sockptr(&f32, src, sizeof(f32)))
  93                        return -EFAULT;
  94                memset(dst, 0, sizeof(*dst));
  95                dst->len = f32.len;
  96                dst->filter = compat_ptr(f32.filter);
  97        } else {
  98                if (len != sizeof(*dst))
  99                        return -EINVAL;
 100                if (copy_from_sockptr(dst, src, sizeof(*dst)))
 101                        return -EFAULT;
 102        }
 103
 104        return 0;
 105}
 106EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
 107
 108/**
 109 *      sk_filter_trim_cap - run a packet through a socket filter
 110 *      @sk: sock associated with &sk_buff
 111 *      @skb: buffer to filter
 112 *      @cap: limit on how short the eBPF program may trim the packet
 113 *
 114 * Run the eBPF program and then cut skb->data to correct size returned by
 115 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
 116 * than pkt_len we keep whole skb->data. This is the socket level
 117 * wrapper to bpf_prog_run. It returns 0 if the packet should
 118 * be accepted or -EPERM if the packet should be tossed.
 119 *
 120 */
 121int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 122{
 123        int err;
 124        struct sk_filter *filter;
 125
 126        /*
 127         * If the skb was allocated from pfmemalloc reserves, only
 128         * allow SOCK_MEMALLOC sockets to use it as this socket is
 129         * helping free memory
 130         */
 131        if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
 132                NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
 133                return -ENOMEM;
 134        }
 135        err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
 136        if (err)
 137                return err;
 138
 139        err = security_sock_rcv_skb(sk, skb);
 140        if (err)
 141                return err;
 142
 143        rcu_read_lock();
 144        filter = rcu_dereference(sk->sk_filter);
 145        if (filter) {
 146                struct sock *save_sk = skb->sk;
 147                unsigned int pkt_len;
 148
 149                skb->sk = sk;
 150                pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
 151                skb->sk = save_sk;
 152                err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
 153        }
 154        rcu_read_unlock();
 155
 156        return err;
 157}
 158EXPORT_SYMBOL(sk_filter_trim_cap);
 159
 160BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
 161{
 162        return skb_get_poff(skb);
 163}
 164
 165BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
 166{
 167        struct nlattr *nla;
 168
 169        if (skb_is_nonlinear(skb))
 170                return 0;
 171
 172        if (skb->len < sizeof(struct nlattr))
 173                return 0;
 174
 175        if (a > skb->len - sizeof(struct nlattr))
 176                return 0;
 177
 178        nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
 179        if (nla)
 180                return (void *) nla - (void *) skb->data;
 181
 182        return 0;
 183}
 184
 185BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
 186{
 187        struct nlattr *nla;
 188
 189        if (skb_is_nonlinear(skb))
 190                return 0;
 191
 192        if (skb->len < sizeof(struct nlattr))
 193                return 0;
 194
 195        if (a > skb->len - sizeof(struct nlattr))
 196                return 0;
 197
 198        nla = (struct nlattr *) &skb->data[a];
 199        if (nla->nla_len > skb->len - a)
 200                return 0;
 201
 202        nla = nla_find_nested(nla, x);
 203        if (nla)
 204                return (void *) nla - (void *) skb->data;
 205
 206        return 0;
 207}
 208
 209BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
 210           data, int, headlen, int, offset)
 211{
 212        u8 tmp, *ptr;
 213        const int len = sizeof(tmp);
 214
 215        if (offset >= 0) {
 216                if (headlen - offset >= len)
 217                        return *(u8 *)(data + offset);
 218                if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
 219                        return tmp;
 220        } else {
 221                ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
 222                if (likely(ptr))
 223                        return *(u8 *)ptr;
 224        }
 225
 226        return -EFAULT;
 227}
 228
 229BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
 230           int, offset)
 231{
 232        return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
 233                                         offset);
 234}
 235
 236BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
 237           data, int, headlen, int, offset)
 238{
 239        u16 tmp, *ptr;
 240        const int len = sizeof(tmp);
 241
 242        if (offset >= 0) {
 243                if (headlen - offset >= len)
 244                        return get_unaligned_be16(data + offset);
 245                if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
 246                        return be16_to_cpu(tmp);
 247        } else {
 248                ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
 249                if (likely(ptr))
 250                        return get_unaligned_be16(ptr);
 251        }
 252
 253        return -EFAULT;
 254}
 255
 256BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
 257           int, offset)
 258{
 259        return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
 260                                          offset);
 261}
 262
 263BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
 264           data, int, headlen, int, offset)
 265{
 266        u32 tmp, *ptr;
 267        const int len = sizeof(tmp);
 268
 269        if (likely(offset >= 0)) {
 270                if (headlen - offset >= len)
 271                        return get_unaligned_be32(data + offset);
 272                if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
 273                        return be32_to_cpu(tmp);
 274        } else {
 275                ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
 276                if (likely(ptr))
 277                        return get_unaligned_be32(ptr);
 278        }
 279
 280        return -EFAULT;
 281}
 282
 283BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
 284           int, offset)
 285{
 286        return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
 287                                          offset);
 288}
 289
 290static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 291                              struct bpf_insn *insn_buf)
 292{
 293        struct bpf_insn *insn = insn_buf;
 294
 295        switch (skb_field) {
 296        case SKF_AD_MARK:
 297                BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
 298
 299                *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
 300                                      offsetof(struct sk_buff, mark));
 301                break;
 302
 303        case SKF_AD_PKTTYPE:
 304                *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
 305                *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
 306#ifdef __BIG_ENDIAN_BITFIELD
 307                *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
 308#endif
 309                break;
 310
 311        case SKF_AD_QUEUE:
 312                BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
 313
 314                *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
 315                                      offsetof(struct sk_buff, queue_mapping));
 316                break;
 317
 318        case SKF_AD_VLAN_TAG:
 319                BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
 320
 321                /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
 322                *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
 323                                      offsetof(struct sk_buff, vlan_tci));
 324                break;
 325        case SKF_AD_VLAN_TAG_PRESENT:
 326                *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
 327                if (PKT_VLAN_PRESENT_BIT)
 328                        *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
 329                if (PKT_VLAN_PRESENT_BIT < 7)
 330                        *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
 331                break;
 332        }
 333
 334        return insn - insn_buf;
 335}
 336
 337static bool convert_bpf_extensions(struct sock_filter *fp,
 338                                   struct bpf_insn **insnp)
 339{
 340        struct bpf_insn *insn = *insnp;
 341        u32 cnt;
 342
 343        switch (fp->k) {
 344        case SKF_AD_OFF + SKF_AD_PROTOCOL:
 345                BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
 346
 347                /* A = *(u16 *) (CTX + offsetof(protocol)) */
 348                *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
 349                                      offsetof(struct sk_buff, protocol));
 350                /* A = ntohs(A) [emitting a nop or swap16] */
 351                *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
 352                break;
 353
 354        case SKF_AD_OFF + SKF_AD_PKTTYPE:
 355                cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
 356                insn += cnt - 1;
 357                break;
 358
 359        case SKF_AD_OFF + SKF_AD_IFINDEX:
 360        case SKF_AD_OFF + SKF_AD_HATYPE:
 361                BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
 362                BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
 363
 364                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
 365                                      BPF_REG_TMP, BPF_REG_CTX,
 366                                      offsetof(struct sk_buff, dev));
 367                /* if (tmp != 0) goto pc + 1 */
 368                *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
 369                *insn++ = BPF_EXIT_INSN();
 370                if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
 371                        *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
 372                                            offsetof(struct net_device, ifindex));
 373                else
 374                        *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
 375                                            offsetof(struct net_device, type));
 376                break;
 377
 378        case SKF_AD_OFF + SKF_AD_MARK:
 379                cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
 380                insn += cnt - 1;
 381                break;
 382
 383        case SKF_AD_OFF + SKF_AD_RXHASH:
 384                BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
 385
 386                *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
 387                                    offsetof(struct sk_buff, hash));
 388                break;
 389
 390        case SKF_AD_OFF + SKF_AD_QUEUE:
 391                cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
 392                insn += cnt - 1;
 393                break;
 394
 395        case SKF_AD_OFF + SKF_AD_VLAN_TAG:
 396                cnt = convert_skb_access(SKF_AD_VLAN_TAG,
 397                                         BPF_REG_A, BPF_REG_CTX, insn);
 398                insn += cnt - 1;
 399                break;
 400
 401        case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
 402                cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
 403                                         BPF_REG_A, BPF_REG_CTX, insn);
 404                insn += cnt - 1;
 405                break;
 406
 407        case SKF_AD_OFF + SKF_AD_VLAN_TPID:
 408                BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
 409
 410                /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
 411                *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
 412                                      offsetof(struct sk_buff, vlan_proto));
 413                /* A = ntohs(A) [emitting a nop or swap16] */
 414                *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
 415                break;
 416
 417        case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
 418        case SKF_AD_OFF + SKF_AD_NLATTR:
 419        case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
 420        case SKF_AD_OFF + SKF_AD_CPU:
 421        case SKF_AD_OFF + SKF_AD_RANDOM:
 422                /* arg1 = CTX */
 423                *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
 424                /* arg2 = A */
 425                *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
 426                /* arg3 = X */
 427                *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
 428                /* Emit call(arg1=CTX, arg2=A, arg3=X) */
 429                switch (fp->k) {
 430                case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
 431                        *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
 432                        break;
 433                case SKF_AD_OFF + SKF_AD_NLATTR:
 434                        *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
 435                        break;
 436                case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
 437                        *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
 438                        break;
 439                case SKF_AD_OFF + SKF_AD_CPU:
 440                        *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
 441                        break;
 442                case SKF_AD_OFF + SKF_AD_RANDOM:
 443                        *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
 444                        bpf_user_rnd_init_once();
 445                        break;
 446                }
 447                break;
 448
 449        case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
 450                /* A ^= X */
 451                *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
 452                break;
 453
 454        default:
 455                /* This is just a dummy call to avoid letting the compiler
 456                 * evict __bpf_call_base() as an optimization. Placed here
 457                 * where no-one bothers.
 458                 */
 459                BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
 460                return false;
 461        }
 462
 463        *insnp = insn;
 464        return true;
 465}
 466
 467static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
 468{
 469        const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
 470        int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
 471        bool endian = BPF_SIZE(fp->code) == BPF_H ||
 472                      BPF_SIZE(fp->code) == BPF_W;
 473        bool indirect = BPF_MODE(fp->code) == BPF_IND;
 474        const int ip_align = NET_IP_ALIGN;
 475        struct bpf_insn *insn = *insnp;
 476        int offset = fp->k;
 477
 478        if (!indirect &&
 479            ((unaligned_ok && offset >= 0) ||
 480             (!unaligned_ok && offset >= 0 &&
 481              offset + ip_align >= 0 &&
 482              offset + ip_align % size == 0))) {
 483                bool ldx_off_ok = offset <= S16_MAX;
 484
 485                *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
 486                if (offset)
 487                        *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
 488                *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
 489                                      size, 2 + endian + (!ldx_off_ok * 2));
 490                if (ldx_off_ok) {
 491                        *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
 492                                              BPF_REG_D, offset);
 493                } else {
 494                        *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
 495                        *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
 496                        *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
 497                                              BPF_REG_TMP, 0);
 498                }
 499                if (endian)
 500                        *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
 501                *insn++ = BPF_JMP_A(8);
 502        }
 503
 504        *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
 505        *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
 506        *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
 507        if (!indirect) {
 508                *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
 509        } else {
 510                *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
 511                if (fp->k)
 512                        *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
 513        }
 514
 515        switch (BPF_SIZE(fp->code)) {
 516        case BPF_B:
 517                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
 518                break;
 519        case BPF_H:
 520                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
 521                break;
 522        case BPF_W:
 523                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
 524                break;
 525        default:
 526                return false;
 527        }
 528
 529        *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
 530        *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
 531        *insn   = BPF_EXIT_INSN();
 532
 533        *insnp = insn;
 534        return true;
 535}
 536
 537/**
 538 *      bpf_convert_filter - convert filter program
 539 *      @prog: the user passed filter program
 540 *      @len: the length of the user passed filter program
 541 *      @new_prog: allocated 'struct bpf_prog' or NULL
 542 *      @new_len: pointer to store length of converted program
 543 *      @seen_ld_abs: bool whether we've seen ld_abs/ind
 544 *
 545 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
 546 * style extended BPF (eBPF).
 547 * Conversion workflow:
 548 *
 549 * 1) First pass for calculating the new program length:
 550 *   bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
 551 *
 552 * 2) 2nd pass to remap in two passes: 1st pass finds new
 553 *    jump offsets, 2nd pass remapping:
 554 *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
 555 */
 556static int bpf_convert_filter(struct sock_filter *prog, int len,
 557                              struct bpf_prog *new_prog, int *new_len,
 558                              bool *seen_ld_abs)
 559{
 560        int new_flen = 0, pass = 0, target, i, stack_off;
 561        struct bpf_insn *new_insn, *first_insn = NULL;
 562        struct sock_filter *fp;
 563        int *addrs = NULL;
 564        u8 bpf_src;
 565
 566        BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
 567        BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
 568
 569        if (len <= 0 || len > BPF_MAXINSNS)
 570                return -EINVAL;
 571
 572        if (new_prog) {
 573                first_insn = new_prog->insnsi;
 574                addrs = kcalloc(len, sizeof(*addrs),
 575                                GFP_KERNEL | __GFP_NOWARN);
 576                if (!addrs)
 577                        return -ENOMEM;
 578        }
 579
 580do_pass:
 581        new_insn = first_insn;
 582        fp = prog;
 583
 584        /* Classic BPF related prologue emission. */
 585        if (new_prog) {
 586                /* Classic BPF expects A and X to be reset first. These need
 587                 * to be guaranteed to be the first two instructions.
 588                 */
 589                *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
 590                *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
 591
 592                /* All programs must keep CTX in callee saved BPF_REG_CTX.
 593                 * In eBPF case it's done by the compiler, here we need to
 594                 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
 595                 */
 596                *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
 597                if (*seen_ld_abs) {
 598                        /* For packet access in classic BPF, cache skb->data
 599                         * in callee-saved BPF R8 and skb->len - skb->data_len
 600                         * (headlen) in BPF R9. Since classic BPF is read-only
 601                         * on CTX, we only need to cache it once.
 602                         */
 603                        *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
 604                                                  BPF_REG_D, BPF_REG_CTX,
 605                                                  offsetof(struct sk_buff, data));
 606                        *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
 607                                                  offsetof(struct sk_buff, len));
 608                        *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
 609                                                  offsetof(struct sk_buff, data_len));
 610                        *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
 611                }
 612        } else {
 613                new_insn += 3;
 614        }
 615
 616        for (i = 0; i < len; fp++, i++) {
 617                struct bpf_insn tmp_insns[32] = { };
 618                struct bpf_insn *insn = tmp_insns;
 619
 620                if (addrs)
 621                        addrs[i] = new_insn - first_insn;
 622
 623                switch (fp->code) {
 624                /* All arithmetic insns and skb loads map as-is. */
 625                case BPF_ALU | BPF_ADD | BPF_X:
 626                case BPF_ALU | BPF_ADD | BPF_K:
 627                case BPF_ALU | BPF_SUB | BPF_X:
 628                case BPF_ALU | BPF_SUB | BPF_K:
 629                case BPF_ALU | BPF_AND | BPF_X:
 630                case BPF_ALU | BPF_AND | BPF_K:
 631                case BPF_ALU | BPF_OR | BPF_X:
 632                case BPF_ALU | BPF_OR | BPF_K:
 633                case BPF_ALU | BPF_LSH | BPF_X:
 634                case BPF_ALU | BPF_LSH | BPF_K:
 635                case BPF_ALU | BPF_RSH | BPF_X:
 636                case BPF_ALU | BPF_RSH | BPF_K:
 637                case BPF_ALU | BPF_XOR | BPF_X:
 638                case BPF_ALU | BPF_XOR | BPF_K:
 639                case BPF_ALU | BPF_MUL | BPF_X:
 640                case BPF_ALU | BPF_MUL | BPF_K:
 641                case BPF_ALU | BPF_DIV | BPF_X:
 642                case BPF_ALU | BPF_DIV | BPF_K:
 643                case BPF_ALU | BPF_MOD | BPF_X:
 644                case BPF_ALU | BPF_MOD | BPF_K:
 645                case BPF_ALU | BPF_NEG:
 646                case BPF_LD | BPF_ABS | BPF_W:
 647                case BPF_LD | BPF_ABS | BPF_H:
 648                case BPF_LD | BPF_ABS | BPF_B:
 649                case BPF_LD | BPF_IND | BPF_W:
 650                case BPF_LD | BPF_IND | BPF_H:
 651                case BPF_LD | BPF_IND | BPF_B:
 652                        /* Check for overloaded BPF extension and
 653                         * directly convert it if found, otherwise
 654                         * just move on with mapping.
 655                         */
 656                        if (BPF_CLASS(fp->code) == BPF_LD &&
 657                            BPF_MODE(fp->code) == BPF_ABS &&
 658                            convert_bpf_extensions(fp, &insn))
 659                                break;
 660                        if (BPF_CLASS(fp->code) == BPF_LD &&
 661                            convert_bpf_ld_abs(fp, &insn)) {
 662                                *seen_ld_abs = true;
 663                                break;
 664                        }
 665
 666                        if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
 667                            fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
 668                                *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
 669                                /* Error with exception code on div/mod by 0.
 670                                 * For cBPF programs, this was always return 0.
 671                                 */
 672                                *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
 673                                *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
 674                                *insn++ = BPF_EXIT_INSN();
 675                        }
 676
 677                        *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
 678                        break;
 679
 680                /* Jump transformation cannot use BPF block macros
 681                 * everywhere as offset calculation and target updates
 682                 * require a bit more work than the rest, i.e. jump
 683                 * opcodes map as-is, but offsets need adjustment.
 684                 */
 685
 686#define BPF_EMIT_JMP                                                    \
 687        do {                                                            \
 688                const s32 off_min = S16_MIN, off_max = S16_MAX;         \
 689                s32 off;                                                \
 690                                                                        \
 691                if (target >= len || target < 0)                        \
 692                        goto err;                                       \
 693                off = addrs ? addrs[target] - addrs[i] - 1 : 0;         \
 694                /* Adjust pc relative offset for 2nd or 3rd insn. */    \
 695                off -= insn - tmp_insns;                                \
 696                /* Reject anything not fitting into insn->off. */       \
 697                if (off < off_min || off > off_max)                     \
 698                        goto err;                                       \
 699                insn->off = off;                                        \
 700        } while (0)
 701
 702                case BPF_JMP | BPF_JA:
 703                        target = i + fp->k + 1;
 704                        insn->code = fp->code;
 705                        BPF_EMIT_JMP;
 706                        break;
 707
 708                case BPF_JMP | BPF_JEQ | BPF_K:
 709                case BPF_JMP | BPF_JEQ | BPF_X:
 710                case BPF_JMP | BPF_JSET | BPF_K:
 711                case BPF_JMP | BPF_JSET | BPF_X:
 712                case BPF_JMP | BPF_JGT | BPF_K:
 713                case BPF_JMP | BPF_JGT | BPF_X:
 714                case BPF_JMP | BPF_JGE | BPF_K:
 715                case BPF_JMP | BPF_JGE | BPF_X:
 716                        if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
 717                                /* BPF immediates are signed, zero extend
 718                                 * immediate into tmp register and use it
 719                                 * in compare insn.
 720                                 */
 721                                *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
 722
 723                                insn->dst_reg = BPF_REG_A;
 724                                insn->src_reg = BPF_REG_TMP;
 725                                bpf_src = BPF_X;
 726                        } else {
 727                                insn->dst_reg = BPF_REG_A;
 728                                insn->imm = fp->k;
 729                                bpf_src = BPF_SRC(fp->code);
 730                                insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
 731                        }
 732
 733                        /* Common case where 'jump_false' is next insn. */
 734                        if (fp->jf == 0) {
 735                                insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
 736                                target = i + fp->jt + 1;
 737                                BPF_EMIT_JMP;
 738                                break;
 739                        }
 740
 741                        /* Convert some jumps when 'jump_true' is next insn. */
 742                        if (fp->jt == 0) {
 743                                switch (BPF_OP(fp->code)) {
 744                                case BPF_JEQ:
 745                                        insn->code = BPF_JMP | BPF_JNE | bpf_src;
 746                                        break;
 747                                case BPF_JGT:
 748                                        insn->code = BPF_JMP | BPF_JLE | bpf_src;
 749                                        break;
 750                                case BPF_JGE:
 751                                        insn->code = BPF_JMP | BPF_JLT | bpf_src;
 752                                        break;
 753                                default:
 754                                        goto jmp_rest;
 755                                }
 756
 757                                target = i + fp->jf + 1;
 758                                BPF_EMIT_JMP;
 759                                break;
 760                        }
 761jmp_rest:
 762                        /* Other jumps are mapped into two insns: Jxx and JA. */
 763                        target = i + fp->jt + 1;
 764                        insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
 765                        BPF_EMIT_JMP;
 766                        insn++;
 767
 768                        insn->code = BPF_JMP | BPF_JA;
 769                        target = i + fp->jf + 1;
 770                        BPF_EMIT_JMP;
 771                        break;
 772
 773                /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 774                case BPF_LDX | BPF_MSH | BPF_B: {
 775                        struct sock_filter tmp = {
 776                                .code   = BPF_LD | BPF_ABS | BPF_B,
 777                                .k      = fp->k,
 778                        };
 779
 780                        *seen_ld_abs = true;
 781
 782                        /* X = A */
 783                        *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 784                        /* A = BPF_R0 = *(u8 *) (skb->data + K) */
 785                        convert_bpf_ld_abs(&tmp, &insn);
 786                        insn++;
 787                        /* A &= 0xf */
 788                        *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
 789                        /* A <<= 2 */
 790                        *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
 791                        /* tmp = X */
 792                        *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
 793                        /* X = A */
 794                        *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 795                        /* A = tmp */
 796                        *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
 797                        break;
 798                }
 799                /* RET_K is remaped into 2 insns. RET_A case doesn't need an
 800                 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
 801                 */
 802                case BPF_RET | BPF_A:
 803                case BPF_RET | BPF_K:
 804                        if (BPF_RVAL(fp->code) == BPF_K)
 805                                *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
 806                                                        0, fp->k);
 807                        *insn = BPF_EXIT_INSN();
 808                        break;
 809
 810                /* Store to stack. */
 811                case BPF_ST:
 812                case BPF_STX:
 813                        stack_off = fp->k * 4  + 4;
 814                        *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
 815                                            BPF_ST ? BPF_REG_A : BPF_REG_X,
 816                                            -stack_off);
 817                        /* check_load_and_stores() verifies that classic BPF can
 818                         * load from stack only after write, so tracking
 819                         * stack_depth for ST|STX insns is enough
 820                         */
 821                        if (new_prog && new_prog->aux->stack_depth < stack_off)
 822                                new_prog->aux->stack_depth = stack_off;
 823                        break;
 824
 825                /* Load from stack. */
 826                case BPF_LD | BPF_MEM:
 827                case BPF_LDX | BPF_MEM:
 828                        stack_off = fp->k * 4  + 4;
 829                        *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
 830                                            BPF_REG_A : BPF_REG_X, BPF_REG_FP,
 831                                            -stack_off);
 832                        break;
 833
 834                /* A = K or X = K */
 835                case BPF_LD | BPF_IMM:
 836                case BPF_LDX | BPF_IMM:
 837                        *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
 838                                              BPF_REG_A : BPF_REG_X, fp->k);
 839                        break;
 840
 841                /* X = A */
 842                case BPF_MISC | BPF_TAX:
 843                        *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 844                        break;
 845
 846                /* A = X */
 847                case BPF_MISC | BPF_TXA:
 848                        *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
 849                        break;
 850
 851                /* A = skb->len or X = skb->len */
 852                case BPF_LD | BPF_W | BPF_LEN:
 853                case BPF_LDX | BPF_W | BPF_LEN:
 854                        *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
 855                                            BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
 856                                            offsetof(struct sk_buff, len));
 857                        break;
 858
 859                /* Access seccomp_data fields. */
 860                case BPF_LDX | BPF_ABS | BPF_W:
 861                        /* A = *(u32 *) (ctx + K) */
 862                        *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
 863                        break;
 864
 865                /* Unknown instruction. */
 866                default:
 867                        goto err;
 868                }
 869
 870                insn++;
 871                if (new_prog)
 872                        memcpy(new_insn, tmp_insns,
 873                               sizeof(*insn) * (insn - tmp_insns));
 874                new_insn += insn - tmp_insns;
 875        }
 876
 877        if (!new_prog) {
 878                /* Only calculating new length. */
 879                *new_len = new_insn - first_insn;
 880                if (*seen_ld_abs)
 881                        *new_len += 4; /* Prologue bits. */
 882                return 0;
 883        }
 884
 885        pass++;
 886        if (new_flen != new_insn - first_insn) {
 887                new_flen = new_insn - first_insn;
 888                if (pass > 2)
 889                        goto err;
 890                goto do_pass;
 891        }
 892
 893        kfree(addrs);
 894        BUG_ON(*new_len != new_flen);
 895        return 0;
 896err:
 897        kfree(addrs);
 898        return -EINVAL;
 899}
 900
 901/* Security:
 902 *
 903 * As we dont want to clear mem[] array for each packet going through
 904 * __bpf_prog_run(), we check that filter loaded by user never try to read
 905 * a cell if not previously written, and we check all branches to be sure
 906 * a malicious user doesn't try to abuse us.
 907 */
 908static int check_load_and_stores(const struct sock_filter *filter, int flen)
 909{
 910        u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
 911        int pc, ret = 0;
 912
 913        BUILD_BUG_ON(BPF_MEMWORDS > 16);
 914
 915        masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
 916        if (!masks)
 917                return -ENOMEM;
 918
 919        memset(masks, 0xff, flen * sizeof(*masks));
 920
 921        for (pc = 0; pc < flen; pc++) {
 922                memvalid &= masks[pc];
 923
 924                switch (filter[pc].code) {
 925                case BPF_ST:
 926                case BPF_STX:
 927                        memvalid |= (1 << filter[pc].k);
 928                        break;
 929                case BPF_LD | BPF_MEM:
 930                case BPF_LDX | BPF_MEM:
 931                        if (!(memvalid & (1 << filter[pc].k))) {
 932                                ret = -EINVAL;
 933                                goto error;
 934                        }
 935                        break;
 936                case BPF_JMP | BPF_JA:
 937                        /* A jump must set masks on target */
 938                        masks[pc + 1 + filter[pc].k] &= memvalid;
 939                        memvalid = ~0;
 940                        break;
 941                case BPF_JMP | BPF_JEQ | BPF_K:
 942                case BPF_JMP | BPF_JEQ | BPF_X:
 943                case BPF_JMP | BPF_JGE | BPF_K:
 944                case BPF_JMP | BPF_JGE | BPF_X:
 945                case BPF_JMP | BPF_JGT | BPF_K:
 946                case BPF_JMP | BPF_JGT | BPF_X:
 947                case BPF_JMP | BPF_JSET | BPF_K:
 948                case BPF_JMP | BPF_JSET | BPF_X:
 949                        /* A jump must set masks on targets */
 950                        masks[pc + 1 + filter[pc].jt] &= memvalid;
 951                        masks[pc + 1 + filter[pc].jf] &= memvalid;
 952                        memvalid = ~0;
 953                        break;
 954                }
 955        }
 956error:
 957        kfree(masks);
 958        return ret;
 959}
 960
 961static bool chk_code_allowed(u16 code_to_probe)
 962{
 963        static const bool codes[] = {
 964                /* 32 bit ALU operations */
 965                [BPF_ALU | BPF_ADD | BPF_K] = true,
 966                [BPF_ALU | BPF_ADD | BPF_X] = true,
 967                [BPF_ALU | BPF_SUB | BPF_K] = true,
 968                [BPF_ALU | BPF_SUB | BPF_X] = true,
 969                [BPF_ALU | BPF_MUL | BPF_K] = true,
 970                [BPF_ALU | BPF_MUL | BPF_X] = true,
 971                [BPF_ALU | BPF_DIV | BPF_K] = true,
 972                [BPF_ALU | BPF_DIV | BPF_X] = true,
 973                [BPF_ALU | BPF_MOD | BPF_K] = true,
 974                [BPF_ALU | BPF_MOD | BPF_X] = true,
 975                [BPF_ALU | BPF_AND | BPF_K] = true,
 976                [BPF_ALU | BPF_AND | BPF_X] = true,
 977                [BPF_ALU | BPF_OR | BPF_K] = true,
 978                [BPF_ALU | BPF_OR | BPF_X] = true,
 979                [BPF_ALU | BPF_XOR | BPF_K] = true,
 980                [BPF_ALU | BPF_XOR | BPF_X] = true,
 981                [BPF_ALU | BPF_LSH | BPF_K] = true,
 982                [BPF_ALU | BPF_LSH | BPF_X] = true,
 983                [BPF_ALU | BPF_RSH | BPF_K] = true,
 984                [BPF_ALU | BPF_RSH | BPF_X] = true,
 985                [BPF_ALU | BPF_NEG] = true,
 986                /* Load instructions */
 987                [BPF_LD | BPF_W | BPF_ABS] = true,
 988                [BPF_LD | BPF_H | BPF_ABS] = true,
 989                [BPF_LD | BPF_B | BPF_ABS] = true,
 990                [BPF_LD | BPF_W | BPF_LEN] = true,
 991                [BPF_LD | BPF_W | BPF_IND] = true,
 992                [BPF_LD | BPF_H | BPF_IND] = true,
 993                [BPF_LD | BPF_B | BPF_IND] = true,
 994                [BPF_LD | BPF_IMM] = true,
 995                [BPF_LD | BPF_MEM] = true,
 996                [BPF_LDX | BPF_W | BPF_LEN] = true,
 997                [BPF_LDX | BPF_B | BPF_MSH] = true,
 998                [BPF_LDX | BPF_IMM] = true,
 999                [BPF_LDX | BPF_MEM] = true,
1000                /* Store instructions */
1001                [BPF_ST] = true,
1002                [BPF_STX] = true,
1003                /* Misc instructions */
1004                [BPF_MISC | BPF_TAX] = true,
1005                [BPF_MISC | BPF_TXA] = true,
1006                /* Return instructions */
1007                [BPF_RET | BPF_K] = true,
1008                [BPF_RET | BPF_A] = true,
1009                /* Jump instructions */
1010                [BPF_JMP | BPF_JA] = true,
1011                [BPF_JMP | BPF_JEQ | BPF_K] = true,
1012                [BPF_JMP | BPF_JEQ | BPF_X] = true,
1013                [BPF_JMP | BPF_JGE | BPF_K] = true,
1014                [BPF_JMP | BPF_JGE | BPF_X] = true,
1015                [BPF_JMP | BPF_JGT | BPF_K] = true,
1016                [BPF_JMP | BPF_JGT | BPF_X] = true,
1017                [BPF_JMP | BPF_JSET | BPF_K] = true,
1018                [BPF_JMP | BPF_JSET | BPF_X] = true,
1019        };
1020
1021        if (code_to_probe >= ARRAY_SIZE(codes))
1022                return false;
1023
1024        return codes[code_to_probe];
1025}
1026
1027static bool bpf_check_basics_ok(const struct sock_filter *filter,
1028                                unsigned int flen)
1029{
1030        if (filter == NULL)
1031                return false;
1032        if (flen == 0 || flen > BPF_MAXINSNS)
1033                return false;
1034
1035        return true;
1036}
1037
1038/**
1039 *      bpf_check_classic - verify socket filter code
1040 *      @filter: filter to verify
1041 *      @flen: length of filter
1042 *
1043 * Check the user's filter code. If we let some ugly
1044 * filter code slip through kaboom! The filter must contain
1045 * no references or jumps that are out of range, no illegal
1046 * instructions, and must end with a RET instruction.
1047 *
1048 * All jumps are forward as they are not signed.
1049 *
1050 * Returns 0 if the rule set is legal or -EINVAL if not.
1051 */
1052static int bpf_check_classic(const struct sock_filter *filter,
1053                             unsigned int flen)
1054{
1055        bool anc_found;
1056        int pc;
1057
1058        /* Check the filter code now */
1059        for (pc = 0; pc < flen; pc++) {
1060                const struct sock_filter *ftest = &filter[pc];
1061
1062                /* May we actually operate on this code? */
1063                if (!chk_code_allowed(ftest->code))
1064                        return -EINVAL;
1065
1066                /* Some instructions need special checks */
1067                switch (ftest->code) {
1068                case BPF_ALU | BPF_DIV | BPF_K:
1069                case BPF_ALU | BPF_MOD | BPF_K:
1070                        /* Check for division by zero */
1071                        if (ftest->k == 0)
1072                                return -EINVAL;
1073                        break;
1074                case BPF_ALU | BPF_LSH | BPF_K:
1075                case BPF_ALU | BPF_RSH | BPF_K:
1076                        if (ftest->k >= 32)
1077                                return -EINVAL;
1078                        break;
1079                case BPF_LD | BPF_MEM:
1080                case BPF_LDX | BPF_MEM:
1081                case BPF_ST:
1082                case BPF_STX:
1083                        /* Check for invalid memory addresses */
1084                        if (ftest->k >= BPF_MEMWORDS)
1085                                return -EINVAL;
1086                        break;
1087                case BPF_JMP | BPF_JA:
1088                        /* Note, the large ftest->k might cause loops.
1089                         * Compare this with conditional jumps below,
1090                         * where offsets are limited. --ANK (981016)
1091                         */
1092                        if (ftest->k >= (unsigned int)(flen - pc - 1))
1093                                return -EINVAL;
1094                        break;
1095                case BPF_JMP | BPF_JEQ | BPF_K:
1096                case BPF_JMP | BPF_JEQ | BPF_X:
1097                case BPF_JMP | BPF_JGE | BPF_K:
1098                case BPF_JMP | BPF_JGE | BPF_X:
1099                case BPF_JMP | BPF_JGT | BPF_K:
1100                case BPF_JMP | BPF_JGT | BPF_X:
1101                case BPF_JMP | BPF_JSET | BPF_K:
1102                case BPF_JMP | BPF_JSET | BPF_X:
1103                        /* Both conditionals must be safe */
1104                        if (pc + ftest->jt + 1 >= flen ||
1105                            pc + ftest->jf + 1 >= flen)
1106                                return -EINVAL;
1107                        break;
1108                case BPF_LD | BPF_W | BPF_ABS:
1109                case BPF_LD | BPF_H | BPF_ABS:
1110                case BPF_LD | BPF_B | BPF_ABS:
1111                        anc_found = false;
1112                        if (bpf_anc_helper(ftest) & BPF_ANC)
1113                                anc_found = true;
1114                        /* Ancillary operation unknown or unsupported */
1115                        if (anc_found == false && ftest->k >= SKF_AD_OFF)
1116                                return -EINVAL;
1117                }
1118        }
1119
1120        /* Last instruction must be a RET code */
1121        switch (filter[flen - 1].code) {
1122        case BPF_RET | BPF_K:
1123        case BPF_RET | BPF_A:
1124                return check_load_and_stores(filter, flen);
1125        }
1126
1127        return -EINVAL;
1128}
1129
1130static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1131                                      const struct sock_fprog *fprog)
1132{
1133        unsigned int fsize = bpf_classic_proglen(fprog);
1134        struct sock_fprog_kern *fkprog;
1135
1136        fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1137        if (!fp->orig_prog)
1138                return -ENOMEM;
1139
1140        fkprog = fp->orig_prog;
1141        fkprog->len = fprog->len;
1142
1143        fkprog->filter = kmemdup(fp->insns, fsize,
1144                                 GFP_KERNEL | __GFP_NOWARN);
1145        if (!fkprog->filter) {
1146                kfree(fp->orig_prog);
1147                return -ENOMEM;
1148        }
1149
1150        return 0;
1151}
1152
1153static void bpf_release_orig_filter(struct bpf_prog *fp)
1154{
1155        struct sock_fprog_kern *fprog = fp->orig_prog;
1156
1157        if (fprog) {
1158                kfree(fprog->filter);
1159                kfree(fprog);
1160        }
1161}
1162
1163static void __bpf_prog_release(struct bpf_prog *prog)
1164{
1165        if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
1166                bpf_prog_put(prog);
1167        } else {
1168                bpf_release_orig_filter(prog);
1169                bpf_prog_free(prog);
1170        }
1171}
1172
1173static void __sk_filter_release(struct sk_filter *fp)
1174{
1175        __bpf_prog_release(fp->prog);
1176        kfree(fp);
1177}
1178
1179/**
1180 *      sk_filter_release_rcu - Release a socket filter by rcu_head
1181 *      @rcu: rcu_head that contains the sk_filter to free
1182 */
1183static void sk_filter_release_rcu(struct rcu_head *rcu)
1184{
1185        struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1186
1187        __sk_filter_release(fp);
1188}
1189
1190/**
1191 *      sk_filter_release - release a socket filter
1192 *      @fp: filter to remove
1193 *
1194 *      Remove a filter from a socket and release its resources.
1195 */
1196static void sk_filter_release(struct sk_filter *fp)
1197{
1198        if (refcount_dec_and_test(&fp->refcnt))
1199                call_rcu(&fp->rcu, sk_filter_release_rcu);
1200}
1201
1202void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1203{
1204        u32 filter_size = bpf_prog_size(fp->prog->len);
1205
1206        atomic_sub(filter_size, &sk->sk_omem_alloc);
1207        sk_filter_release(fp);
1208}
1209
1210/* try to charge the socket memory if there is space available
1211 * return true on success
1212 */
1213static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1214{
1215        u32 filter_size = bpf_prog_size(fp->prog->len);
1216
1217        /* same check as in sock_kmalloc() */
1218        if (filter_size <= sysctl_optmem_max &&
1219            atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
1220                atomic_add(filter_size, &sk->sk_omem_alloc);
1221                return true;
1222        }
1223        return false;
1224}
1225
1226bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1227{
1228        if (!refcount_inc_not_zero(&fp->refcnt))
1229                return false;
1230
1231        if (!__sk_filter_charge(sk, fp)) {
1232                sk_filter_release(fp);
1233                return false;
1234        }
1235        return true;
1236}
1237
1238static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1239{
1240        struct sock_filter *old_prog;
1241        struct bpf_prog *old_fp;
1242        int err, new_len, old_len = fp->len;
1243        bool seen_ld_abs = false;
1244
1245        /* We are free to overwrite insns et al right here as it
1246         * won't be used at this point in time anymore internally
1247         * after the migration to the internal BPF instruction
1248         * representation.
1249         */
1250        BUILD_BUG_ON(sizeof(struct sock_filter) !=
1251                     sizeof(struct bpf_insn));
1252
1253        /* Conversion cannot happen on overlapping memory areas,
1254         * so we need to keep the user BPF around until the 2nd
1255         * pass. At this time, the user BPF is stored in fp->insns.
1256         */
1257        old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1258                           GFP_KERNEL | __GFP_NOWARN);
1259        if (!old_prog) {
1260                err = -ENOMEM;
1261                goto out_err;
1262        }
1263
1264        /* 1st pass: calculate the new program length. */
1265        err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1266                                 &seen_ld_abs);
1267        if (err)
1268                goto out_err_free;
1269
1270        /* Expand fp for appending the new filter representation. */
1271        old_fp = fp;
1272        fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
1273        if (!fp) {
1274                /* The old_fp is still around in case we couldn't
1275                 * allocate new memory, so uncharge on that one.
1276                 */
1277                fp = old_fp;
1278                err = -ENOMEM;
1279                goto out_err_free;
1280        }
1281
1282        fp->len = new_len;
1283
1284        /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
1285        err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1286                                 &seen_ld_abs);
1287        if (err)
1288                /* 2nd bpf_convert_filter() can fail only if it fails
1289                 * to allocate memory, remapping must succeed. Note,
1290                 * that at this time old_fp has already been released
1291                 * by krealloc().
1292                 */
1293                goto out_err_free;
1294
1295        fp = bpf_prog_select_runtime(fp, &err);
1296        if (err)
1297                goto out_err_free;
1298
1299        kfree(old_prog);
1300        return fp;
1301
1302out_err_free:
1303        kfree(old_prog);
1304out_err:
1305        __bpf_prog_release(fp);
1306        return ERR_PTR(err);
1307}
1308
1309static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1310                                           bpf_aux_classic_check_t trans)
1311{
1312        int err;
1313
1314        fp->bpf_func = NULL;
1315        fp->jited = 0;
1316
1317        err = bpf_check_classic(fp->insns, fp->len);
1318        if (err) {
1319                __bpf_prog_release(fp);
1320                return ERR_PTR(err);
1321        }
1322
1323        /* There might be additional checks and transformations
1324         * needed on classic filters, f.e. in case of seccomp.
1325         */
1326        if (trans) {
1327                err = trans(fp->insns, fp->len);
1328                if (err) {
1329                        __bpf_prog_release(fp);
1330                        return ERR_PTR(err);
1331                }
1332        }
1333
1334        /* Probe if we can JIT compile the filter and if so, do
1335         * the compilation of the filter.
1336         */
1337        bpf_jit_compile(fp);
1338
1339        /* JIT compiler couldn't process this filter, so do the
1340         * internal BPF translation for the optimized interpreter.
1341         */
1342        if (!fp->jited)
1343                fp = bpf_migrate_filter(fp);
1344
1345        return fp;
1346}
1347
1348/**
1349 *      bpf_prog_create - create an unattached filter
1350 *      @pfp: the unattached filter that is created
1351 *      @fprog: the filter program
1352 *
1353 * Create a filter independent of any socket. We first run some
1354 * sanity checks on it to make sure it does not explode on us later.
1355 * If an error occurs or there is insufficient memory for the filter
1356 * a negative errno code is returned. On success the return is zero.
1357 */
1358int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1359{
1360        unsigned int fsize = bpf_classic_proglen(fprog);
1361        struct bpf_prog *fp;
1362
1363        /* Make sure new filter is there and in the right amounts. */
1364        if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1365                return -EINVAL;
1366
1367        fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1368        if (!fp)
1369                return -ENOMEM;
1370
1371        memcpy(fp->insns, fprog->filter, fsize);
1372
1373        fp->len = fprog->len;
1374        /* Since unattached filters are not copied back to user
1375         * space through sk_get_filter(), we do not need to hold
1376         * a copy here, and can spare us the work.
1377         */
1378        fp->orig_prog = NULL;
1379
1380        /* bpf_prepare_filter() already takes care of freeing
1381         * memory in case something goes wrong.
1382         */
1383        fp = bpf_prepare_filter(fp, NULL);
1384        if (IS_ERR(fp))
1385                return PTR_ERR(fp);
1386
1387        *pfp = fp;
1388        return 0;
1389}
1390EXPORT_SYMBOL_GPL(bpf_prog_create);
1391
1392/**
1393 *      bpf_prog_create_from_user - create an unattached filter from user buffer
1394 *      @pfp: the unattached filter that is created
1395 *      @fprog: the filter program
1396 *      @trans: post-classic verifier transformation handler
1397 *      @save_orig: save classic BPF program
1398 *
1399 * This function effectively does the same as bpf_prog_create(), only
1400 * that it builds up its insns buffer from user space provided buffer.
1401 * It also allows for passing a bpf_aux_classic_check_t handler.
1402 */
1403int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1404                              bpf_aux_classic_check_t trans, bool save_orig)
1405{
1406        unsigned int fsize = bpf_classic_proglen(fprog);
1407        struct bpf_prog *fp;
1408        int err;
1409
1410        /* Make sure new filter is there and in the right amounts. */
1411        if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1412                return -EINVAL;
1413
1414        fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1415        if (!fp)
1416                return -ENOMEM;
1417
1418        if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1419                __bpf_prog_free(fp);
1420                return -EFAULT;
1421        }
1422
1423        fp->len = fprog->len;
1424        fp->orig_prog = NULL;
1425
1426        if (save_orig) {
1427                err = bpf_prog_store_orig_filter(fp, fprog);
1428                if (err) {
1429                        __bpf_prog_free(fp);
1430                        return -ENOMEM;
1431                }
1432        }
1433
1434        /* bpf_prepare_filter() already takes care of freeing
1435         * memory in case something goes wrong.
1436         */
1437        fp = bpf_prepare_filter(fp, trans);
1438        if (IS_ERR(fp))
1439                return PTR_ERR(fp);
1440
1441        *pfp = fp;
1442        return 0;
1443}
1444EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1445
1446void bpf_prog_destroy(struct bpf_prog *fp)
1447{
1448        __bpf_prog_release(fp);
1449}
1450EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1451
1452static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1453{
1454        struct sk_filter *fp, *old_fp;
1455
1456        fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1457        if (!fp)
1458                return -ENOMEM;
1459
1460        fp->prog = prog;
1461
1462        if (!__sk_filter_charge(sk, fp)) {
1463                kfree(fp);
1464                return -ENOMEM;
1465        }
1466        refcount_set(&fp->refcnt, 1);
1467
1468        old_fp = rcu_dereference_protected(sk->sk_filter,
1469                                           lockdep_sock_is_held(sk));
1470        rcu_assign_pointer(sk->sk_filter, fp);
1471
1472        if (old_fp)
1473                sk_filter_uncharge(sk, old_fp);
1474
1475        return 0;
1476}
1477
1478static
1479struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1480{
1481        unsigned int fsize = bpf_classic_proglen(fprog);
1482        struct bpf_prog *prog;
1483        int err;
1484
1485        if (sock_flag(sk, SOCK_FILTER_LOCKED))
1486                return ERR_PTR(-EPERM);
1487
1488        /* Make sure new filter is there and in the right amounts. */
1489        if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1490                return ERR_PTR(-EINVAL);
1491
1492        prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1493        if (!prog)
1494                return ERR_PTR(-ENOMEM);
1495
1496        if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1497                __bpf_prog_free(prog);
1498                return ERR_PTR(-EFAULT);
1499        }
1500
1501        prog->len = fprog->len;
1502
1503        err = bpf_prog_store_orig_filter(prog, fprog);
1504        if (err) {
1505                __bpf_prog_free(prog);
1506                return ERR_PTR(-ENOMEM);
1507        }
1508
1509        /* bpf_prepare_filter() already takes care of freeing
1510         * memory in case something goes wrong.
1511         */
1512        return bpf_prepare_filter(prog, NULL);
1513}
1514
1515/**
1516 *      sk_attach_filter - attach a socket filter
1517 *      @fprog: the filter program
1518 *      @sk: the socket to use
1519 *
1520 * Attach the user's filter code. We first run some sanity checks on
1521 * it to make sure it does not explode on us later. If an error
1522 * occurs or there is insufficient memory for the filter a negative
1523 * errno code is returned. On success the return is zero.
1524 */
1525int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1526{
1527        struct bpf_prog *prog = __get_filter(fprog, sk);
1528        int err;
1529
1530        if (IS_ERR(prog))
1531                return PTR_ERR(prog);
1532
1533        err = __sk_attach_prog(prog, sk);
1534        if (err < 0) {
1535                __bpf_prog_release(prog);
1536                return err;
1537        }
1538
1539        return 0;
1540}
1541EXPORT_SYMBOL_GPL(sk_attach_filter);
1542
1543int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1544{
1545        struct bpf_prog *prog = __get_filter(fprog, sk);
1546        int err;
1547
1548        if (IS_ERR(prog))
1549                return PTR_ERR(prog);
1550
1551        if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1552                err = -ENOMEM;
1553        else
1554                err = reuseport_attach_prog(sk, prog);
1555
1556        if (err)
1557                __bpf_prog_release(prog);
1558
1559        return err;
1560}
1561
1562static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1563{
1564        if (sock_flag(sk, SOCK_FILTER_LOCKED))
1565                return ERR_PTR(-EPERM);
1566
1567        return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1568}
1569
1570int sk_attach_bpf(u32 ufd, struct sock *sk)
1571{
1572        struct bpf_prog *prog = __get_bpf(ufd, sk);
1573        int err;
1574
1575        if (IS_ERR(prog))
1576                return PTR_ERR(prog);
1577
1578        err = __sk_attach_prog(prog, sk);
1579        if (err < 0) {
1580                bpf_prog_put(prog);
1581                return err;
1582        }
1583
1584        return 0;
1585}
1586
1587int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1588{
1589        struct bpf_prog *prog;
1590        int err;
1591
1592        if (sock_flag(sk, SOCK_FILTER_LOCKED))
1593                return -EPERM;
1594
1595        prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1596        if (PTR_ERR(prog) == -EINVAL)
1597                prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
1598        if (IS_ERR(prog))
1599                return PTR_ERR(prog);
1600
1601        if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1602                /* Like other non BPF_PROG_TYPE_SOCKET_FILTER
1603                 * bpf prog (e.g. sockmap).  It depends on the
1604                 * limitation imposed by bpf_prog_load().
1605                 * Hence, sysctl_optmem_max is not checked.
1606                 */
1607                if ((sk->sk_type != SOCK_STREAM &&
1608                     sk->sk_type != SOCK_DGRAM) ||
1609                    (sk->sk_protocol != IPPROTO_UDP &&
1610                     sk->sk_protocol != IPPROTO_TCP) ||
1611                    (sk->sk_family != AF_INET &&
1612                     sk->sk_family != AF_INET6)) {
1613                        err = -ENOTSUPP;
1614                        goto err_prog_put;
1615                }
1616        } else {
1617                /* BPF_PROG_TYPE_SOCKET_FILTER */
1618                if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
1619                        err = -ENOMEM;
1620                        goto err_prog_put;
1621                }
1622        }
1623
1624        err = reuseport_attach_prog(sk, prog);
1625err_prog_put:
1626        if (err)
1627                bpf_prog_put(prog);
1628
1629        return err;
1630}
1631
1632void sk_reuseport_prog_free(struct bpf_prog *prog)
1633{
1634        if (!prog)
1635                return;
1636
1637        if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1638                bpf_prog_put(prog);
1639        else
1640                bpf_prog_destroy(prog);
1641}
1642
1643struct bpf_scratchpad {
1644        union {
1645                __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1646                u8     buff[MAX_BPF_STACK];
1647        };
1648};
1649
1650static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1651
1652static inline int __bpf_try_make_writable(struct sk_buff *skb,
1653                                          unsigned int write_len)
1654{
1655        return skb_ensure_writable(skb, write_len);
1656}
1657
1658static inline int bpf_try_make_writable(struct sk_buff *skb,
1659                                        unsigned int write_len)
1660{
1661        int err = __bpf_try_make_writable(skb, write_len);
1662
1663        bpf_compute_data_pointers(skb);
1664        return err;
1665}
1666
1667static int bpf_try_make_head_writable(struct sk_buff *skb)
1668{
1669        return bpf_try_make_writable(skb, skb_headlen(skb));
1670}
1671
1672static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1673{
1674        if (skb_at_tc_ingress(skb))
1675                skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1676}
1677
1678static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1679{
1680        if (skb_at_tc_ingress(skb))
1681                skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1682}
1683
1684BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1685           const void *, from, u32, len, u64, flags)
1686{
1687        void *ptr;
1688
1689        if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1690                return -EINVAL;
1691        if (unlikely(offset > 0xffff))
1692                return -EFAULT;
1693        if (unlikely(bpf_try_make_writable(skb, offset + len)))
1694                return -EFAULT;
1695
1696        ptr = skb->data + offset;
1697        if (flags & BPF_F_RECOMPUTE_CSUM)
1698                __skb_postpull_rcsum(skb, ptr, len, offset);
1699
1700        memcpy(ptr, from, len);
1701
1702        if (flags & BPF_F_RECOMPUTE_CSUM)
1703                __skb_postpush_rcsum(skb, ptr, len, offset);
1704        if (flags & BPF_F_INVALIDATE_HASH)
1705                skb_clear_hash(skb);
1706
1707        return 0;
1708}
1709
1710static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1711        .func           = bpf_skb_store_bytes,
1712        .gpl_only       = false,
1713        .ret_type       = RET_INTEGER,
1714        .arg1_type      = ARG_PTR_TO_CTX,
1715        .arg2_type      = ARG_ANYTHING,
1716        .arg3_type      = ARG_PTR_TO_MEM,
1717        .arg4_type      = ARG_CONST_SIZE,
1718        .arg5_type      = ARG_ANYTHING,
1719};
1720
1721BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1722           void *, to, u32, len)
1723{
1724        void *ptr;
1725
1726        if (unlikely(offset > 0xffff))
1727                goto err_clear;
1728
1729        ptr = skb_header_pointer(skb, offset, len, to);
1730        if (unlikely(!ptr))
1731                goto err_clear;
1732        if (ptr != to)
1733                memcpy(to, ptr, len);
1734
1735        return 0;
1736err_clear:
1737        memset(to, 0, len);
1738        return -EFAULT;
1739}
1740
1741static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1742        .func           = bpf_skb_load_bytes,
1743        .gpl_only       = false,
1744        .ret_type       = RET_INTEGER,
1745        .arg1_type      = ARG_PTR_TO_CTX,
1746        .arg2_type      = ARG_ANYTHING,
1747        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
1748        .arg4_type      = ARG_CONST_SIZE,
1749};
1750
1751BPF_CALL_4(bpf_flow_dissector_load_bytes,
1752           const struct bpf_flow_dissector *, ctx, u32, offset,
1753           void *, to, u32, len)
1754{
1755        void *ptr;
1756
1757        if (unlikely(offset > 0xffff))
1758                goto err_clear;
1759
1760        if (unlikely(!ctx->skb))
1761                goto err_clear;
1762
1763        ptr = skb_header_pointer(ctx->skb, offset, len, to);
1764        if (unlikely(!ptr))
1765                goto err_clear;
1766        if (ptr != to)
1767                memcpy(to, ptr, len);
1768
1769        return 0;
1770err_clear:
1771        memset(to, 0, len);
1772        return -EFAULT;
1773}
1774
1775static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1776        .func           = bpf_flow_dissector_load_bytes,
1777        .gpl_only       = false,
1778        .ret_type       = RET_INTEGER,
1779        .arg1_type      = ARG_PTR_TO_CTX,
1780        .arg2_type      = ARG_ANYTHING,
1781        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
1782        .arg4_type      = ARG_CONST_SIZE,
1783};
1784
1785BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1786           u32, offset, void *, to, u32, len, u32, start_header)
1787{
1788        u8 *end = skb_tail_pointer(skb);
1789        u8 *start, *ptr;
1790
1791        if (unlikely(offset > 0xffff))
1792                goto err_clear;
1793
1794        switch (start_header) {
1795        case BPF_HDR_START_MAC:
1796                if (unlikely(!skb_mac_header_was_set(skb)))
1797                        goto err_clear;
1798                start = skb_mac_header(skb);
1799                break;
1800        case BPF_HDR_START_NET:
1801                start = skb_network_header(skb);
1802                break;
1803        default:
1804                goto err_clear;
1805        }
1806
1807        ptr = start + offset;
1808
1809        if (likely(ptr + len <= end)) {
1810                memcpy(to, ptr, len);
1811                return 0;
1812        }
1813
1814err_clear:
1815        memset(to, 0, len);
1816        return -EFAULT;
1817}
1818
1819static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1820        .func           = bpf_skb_load_bytes_relative,
1821        .gpl_only       = false,
1822        .ret_type       = RET_INTEGER,
1823        .arg1_type      = ARG_PTR_TO_CTX,
1824        .arg2_type      = ARG_ANYTHING,
1825        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
1826        .arg4_type      = ARG_CONST_SIZE,
1827        .arg5_type      = ARG_ANYTHING,
1828};
1829
1830BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1831{
1832        /* Idea is the following: should the needed direct read/write
1833         * test fail during runtime, we can pull in more data and redo
1834         * again, since implicitly, we invalidate previous checks here.
1835         *
1836         * Or, since we know how much we need to make read/writeable,
1837         * this can be done once at the program beginning for direct
1838         * access case. By this we overcome limitations of only current
1839         * headroom being accessible.
1840         */
1841        return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1842}
1843
1844static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1845        .func           = bpf_skb_pull_data,
1846        .gpl_only       = false,
1847        .ret_type       = RET_INTEGER,
1848        .arg1_type      = ARG_PTR_TO_CTX,
1849        .arg2_type      = ARG_ANYTHING,
1850};
1851
1852BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1853{
1854        return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1855}
1856
1857static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1858        .func           = bpf_sk_fullsock,
1859        .gpl_only       = false,
1860        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
1861        .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
1862};
1863
1864static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1865                                           unsigned int write_len)
1866{
1867        return __bpf_try_make_writable(skb, write_len);
1868}
1869
1870BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1871{
1872        /* Idea is the following: should the needed direct read/write
1873         * test fail during runtime, we can pull in more data and redo
1874         * again, since implicitly, we invalidate previous checks here.
1875         *
1876         * Or, since we know how much we need to make read/writeable,
1877         * this can be done once at the program beginning for direct
1878         * access case. By this we overcome limitations of only current
1879         * headroom being accessible.
1880         */
1881        return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1882}
1883
1884static const struct bpf_func_proto sk_skb_pull_data_proto = {
1885        .func           = sk_skb_pull_data,
1886        .gpl_only       = false,
1887        .ret_type       = RET_INTEGER,
1888        .arg1_type      = ARG_PTR_TO_CTX,
1889        .arg2_type      = ARG_ANYTHING,
1890};
1891
1892BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1893           u64, from, u64, to, u64, flags)
1894{
1895        __sum16 *ptr;
1896
1897        if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1898                return -EINVAL;
1899        if (unlikely(offset > 0xffff || offset & 1))
1900                return -EFAULT;
1901        if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1902                return -EFAULT;
1903
1904        ptr = (__sum16 *)(skb->data + offset);
1905        switch (flags & BPF_F_HDR_FIELD_MASK) {
1906        case 0:
1907                if (unlikely(from != 0))
1908                        return -EINVAL;
1909
1910                csum_replace_by_diff(ptr, to);
1911                break;
1912        case 2:
1913                csum_replace2(ptr, from, to);
1914                break;
1915        case 4:
1916                csum_replace4(ptr, from, to);
1917                break;
1918        default:
1919                return -EINVAL;
1920        }
1921
1922        return 0;
1923}
1924
1925static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1926        .func           = bpf_l3_csum_replace,
1927        .gpl_only       = false,
1928        .ret_type       = RET_INTEGER,
1929        .arg1_type      = ARG_PTR_TO_CTX,
1930        .arg2_type      = ARG_ANYTHING,
1931        .arg3_type      = ARG_ANYTHING,
1932        .arg4_type      = ARG_ANYTHING,
1933        .arg5_type      = ARG_ANYTHING,
1934};
1935
1936BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1937           u64, from, u64, to, u64, flags)
1938{
1939        bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1940        bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1941        bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1942        __sum16 *ptr;
1943
1944        if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1945                               BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
1946                return -EINVAL;
1947        if (unlikely(offset > 0xffff || offset & 1))
1948                return -EFAULT;
1949        if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1950                return -EFAULT;
1951
1952        ptr = (__sum16 *)(skb->data + offset);
1953        if (is_mmzero && !do_mforce && !*ptr)
1954                return 0;
1955
1956        switch (flags & BPF_F_HDR_FIELD_MASK) {
1957        case 0:
1958                if (unlikely(from != 0))
1959                        return -EINVAL;
1960
1961                inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1962                break;
1963        case 2:
1964                inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1965                break;
1966        case 4:
1967                inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1968                break;
1969        default:
1970                return -EINVAL;
1971        }
1972
1973        if (is_mmzero && !*ptr)
1974                *ptr = CSUM_MANGLED_0;
1975        return 0;
1976}
1977
1978static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1979        .func           = bpf_l4_csum_replace,
1980        .gpl_only       = false,
1981        .ret_type       = RET_INTEGER,
1982        .arg1_type      = ARG_PTR_TO_CTX,
1983        .arg2_type      = ARG_ANYTHING,
1984        .arg3_type      = ARG_ANYTHING,
1985        .arg4_type      = ARG_ANYTHING,
1986        .arg5_type      = ARG_ANYTHING,
1987};
1988
1989BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1990           __be32 *, to, u32, to_size, __wsum, seed)
1991{
1992        struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1993        u32 diff_size = from_size + to_size;
1994        int i, j = 0;
1995
1996        /* This is quite flexible, some examples:
1997         *
1998         * from_size == 0, to_size > 0,  seed := csum --> pushing data
1999         * from_size > 0,  to_size == 0, seed := csum --> pulling data
2000         * from_size > 0,  to_size > 0,  seed := 0    --> diffing data
2001         *
2002         * Even for diffing, from_size and to_size don't need to be equal.
2003         */
2004        if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
2005                     diff_size > sizeof(sp->diff)))
2006                return -EINVAL;
2007
2008        for (i = 0; i < from_size / sizeof(__be32); i++, j++)
2009                sp->diff[j] = ~from[i];
2010        for (i = 0; i <   to_size / sizeof(__be32); i++, j++)
2011                sp->diff[j] = to[i];
2012
2013        return csum_partial(sp->diff, diff_size, seed);
2014}
2015
2016static const struct bpf_func_proto bpf_csum_diff_proto = {
2017        .func           = bpf_csum_diff,
2018        .gpl_only       = false,
2019        .pkt_access     = true,
2020        .ret_type       = RET_INTEGER,
2021        .arg1_type      = ARG_PTR_TO_MEM_OR_NULL,
2022        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
2023        .arg3_type      = ARG_PTR_TO_MEM_OR_NULL,
2024        .arg4_type      = ARG_CONST_SIZE_OR_ZERO,
2025        .arg5_type      = ARG_ANYTHING,
2026};
2027
2028BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2029{
2030        /* The interface is to be used in combination with bpf_csum_diff()
2031         * for direct packet writes. csum rotation for alignment as well
2032         * as emulating csum_sub() can be done from the eBPF program.
2033         */
2034        if (skb->ip_summed == CHECKSUM_COMPLETE)
2035                return (skb->csum = csum_add(skb->csum, csum));
2036
2037        return -ENOTSUPP;
2038}
2039
2040static const struct bpf_func_proto bpf_csum_update_proto = {
2041        .func           = bpf_csum_update,
2042        .gpl_only       = false,
2043        .ret_type       = RET_INTEGER,
2044        .arg1_type      = ARG_PTR_TO_CTX,
2045        .arg2_type      = ARG_ANYTHING,
2046};
2047
2048BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2049{
2050        /* The interface is to be used in combination with bpf_skb_adjust_room()
2051         * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
2052         * is passed as flags, for example.
2053         */
2054        switch (level) {
2055        case BPF_CSUM_LEVEL_INC:
2056                __skb_incr_checksum_unnecessary(skb);
2057                break;
2058        case BPF_CSUM_LEVEL_DEC:
2059                __skb_decr_checksum_unnecessary(skb);
2060                break;
2061        case BPF_CSUM_LEVEL_RESET:
2062                __skb_reset_checksum_unnecessary(skb);
2063                break;
2064        case BPF_CSUM_LEVEL_QUERY:
2065                return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2066                       skb->csum_level : -EACCES;
2067        default:
2068                return -EINVAL;
2069        }
2070
2071        return 0;
2072}
2073
2074static const struct bpf_func_proto bpf_csum_level_proto = {
2075        .func           = bpf_csum_level,
2076        .gpl_only       = false,
2077        .ret_type       = RET_INTEGER,
2078        .arg1_type      = ARG_PTR_TO_CTX,
2079        .arg2_type      = ARG_ANYTHING,
2080};
2081
2082static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
2083{
2084        return dev_forward_skb_nomtu(dev, skb);
2085}
2086
2087static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2088                                      struct sk_buff *skb)
2089{
2090        int ret = ____dev_forward_skb(dev, skb, false);
2091
2092        if (likely(!ret)) {
2093                skb->dev = dev;
2094                ret = netif_rx(skb);
2095        }
2096
2097        return ret;
2098}
2099
2100static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
2101{
2102        int ret;
2103
2104        if (dev_xmit_recursion()) {
2105                net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2106                kfree_skb(skb);
2107                return -ENETDOWN;
2108        }
2109
2110        skb->dev = dev;
2111        skb->tstamp = 0;
2112
2113        dev_xmit_recursion_inc();
2114        ret = dev_queue_xmit(skb);
2115        dev_xmit_recursion_dec();
2116
2117        return ret;
2118}
2119
2120static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2121                                 u32 flags)
2122{
2123        unsigned int mlen = skb_network_offset(skb);
2124
2125        if (mlen) {
2126                __skb_pull(skb, mlen);
2127
2128                /* At ingress, the mac header has already been pulled once.
2129                 * At egress, skb_pospull_rcsum has to be done in case that
2130                 * the skb is originated from ingress (i.e. a forwarded skb)
2131                 * to ensure that rcsum starts at net header.
2132                 */
2133                if (!skb_at_tc_ingress(skb))
2134                        skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2135        }
2136        skb_pop_mac_header(skb);
2137        skb_reset_mac_len(skb);
2138        return flags & BPF_F_INGRESS ?
2139               __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2140}
2141
2142static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2143                                 u32 flags)
2144{
2145        /* Verify that a link layer header is carried */
2146        if (unlikely(skb->mac_header >= skb->network_header)) {
2147                kfree_skb(skb);
2148                return -ERANGE;
2149        }
2150
2151        bpf_push_mac_rcsum(skb);
2152        return flags & BPF_F_INGRESS ?
2153               __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2154}
2155
2156static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2157                          u32 flags)
2158{
2159        if (dev_is_mac_header_xmit(dev))
2160                return __bpf_redirect_common(skb, dev, flags);
2161        else
2162                return __bpf_redirect_no_mac(skb, dev, flags);
2163}
2164
2165#if IS_ENABLED(CONFIG_IPV6)
2166static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
2167                            struct net_device *dev, struct bpf_nh_params *nh)
2168{
2169        u32 hh_len = LL_RESERVED_SPACE(dev);
2170        const struct in6_addr *nexthop;
2171        struct dst_entry *dst = NULL;
2172        struct neighbour *neigh;
2173
2174        if (dev_xmit_recursion()) {
2175                net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2176                goto out_drop;
2177        }
2178
2179        skb->dev = dev;
2180        skb->tstamp = 0;
2181
2182        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2183                skb = skb_expand_head(skb, hh_len);
2184                if (!skb)
2185                        return -ENOMEM;
2186        }
2187
2188        rcu_read_lock_bh();
2189        if (!nh) {
2190                dst = skb_dst(skb);
2191                nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
2192                                      &ipv6_hdr(skb)->daddr);
2193        } else {
2194                nexthop = &nh->ipv6_nh;
2195        }
2196        neigh = ip_neigh_gw6(dev, nexthop);
2197        if (likely(!IS_ERR(neigh))) {
2198                int ret;
2199
2200                sock_confirm_neigh(skb, neigh);
2201                dev_xmit_recursion_inc();
2202                ret = neigh_output(neigh, skb, false);
2203                dev_xmit_recursion_dec();
2204                rcu_read_unlock_bh();
2205                return ret;
2206        }
2207        rcu_read_unlock_bh();
2208        if (dst)
2209                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
2210out_drop:
2211        kfree_skb(skb);
2212        return -ENETDOWN;
2213}
2214
2215static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2216                                   struct bpf_nh_params *nh)
2217{
2218        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
2219        struct net *net = dev_net(dev);
2220        int err, ret = NET_XMIT_DROP;
2221
2222        if (!nh) {
2223                struct dst_entry *dst;
2224                struct flowi6 fl6 = {
2225                        .flowi6_flags = FLOWI_FLAG_ANYSRC,
2226                        .flowi6_mark  = skb->mark,
2227                        .flowlabel    = ip6_flowinfo(ip6h),
2228                        .flowi6_oif   = dev->ifindex,
2229                        .flowi6_proto = ip6h->nexthdr,
2230                        .daddr        = ip6h->daddr,
2231                        .saddr        = ip6h->saddr,
2232                };
2233
2234                dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2235                if (IS_ERR(dst))
2236                        goto out_drop;
2237
2238                skb_dst_set(skb, dst);
2239        } else if (nh->nh_family != AF_INET6) {
2240                goto out_drop;
2241        }
2242
2243        err = bpf_out_neigh_v6(net, skb, dev, nh);
2244        if (unlikely(net_xmit_eval(err)))
2245                dev->stats.tx_errors++;
2246        else
2247                ret = NET_XMIT_SUCCESS;
2248        goto out_xmit;
2249out_drop:
2250        dev->stats.tx_errors++;
2251        kfree_skb(skb);
2252out_xmit:
2253        return ret;
2254}
2255#else
2256static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2257                                   struct bpf_nh_params *nh)
2258{
2259        kfree_skb(skb);
2260        return NET_XMIT_DROP;
2261}
2262#endif /* CONFIG_IPV6 */
2263
2264#if IS_ENABLED(CONFIG_INET)
2265static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
2266                            struct net_device *dev, struct bpf_nh_params *nh)
2267{
2268        u32 hh_len = LL_RESERVED_SPACE(dev);
2269        struct neighbour *neigh;
2270        bool is_v6gw = false;
2271
2272        if (dev_xmit_recursion()) {
2273                net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2274                goto out_drop;
2275        }
2276
2277        skb->dev = dev;
2278        skb->tstamp = 0;
2279
2280        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2281                skb = skb_expand_head(skb, hh_len);
2282                if (!skb)
2283                        return -ENOMEM;
2284        }
2285
2286        rcu_read_lock_bh();
2287        if (!nh) {
2288                struct dst_entry *dst = skb_dst(skb);
2289                struct rtable *rt = container_of(dst, struct rtable, dst);
2290
2291                neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
2292        } else if (nh->nh_family == AF_INET6) {
2293                neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
2294                is_v6gw = true;
2295        } else if (nh->nh_family == AF_INET) {
2296                neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
2297        } else {
2298                rcu_read_unlock_bh();
2299                goto out_drop;
2300        }
2301
2302        if (likely(!IS_ERR(neigh))) {
2303                int ret;
2304
2305                sock_confirm_neigh(skb, neigh);
2306                dev_xmit_recursion_inc();
2307                ret = neigh_output(neigh, skb, is_v6gw);
2308                dev_xmit_recursion_dec();
2309                rcu_read_unlock_bh();
2310                return ret;
2311        }
2312        rcu_read_unlock_bh();
2313out_drop:
2314        kfree_skb(skb);
2315        return -ENETDOWN;
2316}
2317
2318static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2319                                   struct bpf_nh_params *nh)
2320{
2321        const struct iphdr *ip4h = ip_hdr(skb);
2322        struct net *net = dev_net(dev);
2323        int err, ret = NET_XMIT_DROP;
2324
2325        if (!nh) {
2326                struct flowi4 fl4 = {
2327                        .flowi4_flags = FLOWI_FLAG_ANYSRC,
2328                        .flowi4_mark  = skb->mark,
2329                        .flowi4_tos   = RT_TOS(ip4h->tos),
2330                        .flowi4_oif   = dev->ifindex,
2331                        .flowi4_proto = ip4h->protocol,
2332                        .daddr        = ip4h->daddr,
2333                        .saddr        = ip4h->saddr,
2334                };
2335                struct rtable *rt;
2336
2337                rt = ip_route_output_flow(net, &fl4, NULL);
2338                if (IS_ERR(rt))
2339                        goto out_drop;
2340                if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2341                        ip_rt_put(rt);
2342                        goto out_drop;
2343                }
2344
2345                skb_dst_set(skb, &rt->dst);
2346        }
2347
2348        err = bpf_out_neigh_v4(net, skb, dev, nh);
2349        if (unlikely(net_xmit_eval(err)))
2350                dev->stats.tx_errors++;
2351        else
2352                ret = NET_XMIT_SUCCESS;
2353        goto out_xmit;
2354out_drop:
2355        dev->stats.tx_errors++;
2356        kfree_skb(skb);
2357out_xmit:
2358        return ret;
2359}
2360#else
2361static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2362                                   struct bpf_nh_params *nh)
2363{
2364        kfree_skb(skb);
2365        return NET_XMIT_DROP;
2366}
2367#endif /* CONFIG_INET */
2368
2369static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
2370                                struct bpf_nh_params *nh)
2371{
2372        struct ethhdr *ethh = eth_hdr(skb);
2373
2374        if (unlikely(skb->mac_header >= skb->network_header))
2375                goto out;
2376        bpf_push_mac_rcsum(skb);
2377        if (is_multicast_ether_addr(ethh->h_dest))
2378                goto out;
2379
2380        skb_pull(skb, sizeof(*ethh));
2381        skb_unset_mac_header(skb);
2382        skb_reset_network_header(skb);
2383
2384        if (skb->protocol == htons(ETH_P_IP))
2385                return __bpf_redirect_neigh_v4(skb, dev, nh);
2386        else if (skb->protocol == htons(ETH_P_IPV6))
2387                return __bpf_redirect_neigh_v6(skb, dev, nh);
2388out:
2389        kfree_skb(skb);
2390        return -ENOTSUPP;
2391}
2392
2393/* Internal, non-exposed redirect flags. */
2394enum {
2395        BPF_F_NEIGH     = (1ULL << 1),
2396        BPF_F_PEER      = (1ULL << 2),
2397        BPF_F_NEXTHOP   = (1ULL << 3),
2398#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
2399};
2400
2401BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
2402{
2403        struct net_device *dev;
2404        struct sk_buff *clone;
2405        int ret;
2406
2407        if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
2408                return -EINVAL;
2409
2410        dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2411        if (unlikely(!dev))
2412                return -EINVAL;
2413
2414        clone = skb_clone(skb, GFP_ATOMIC);
2415        if (unlikely(!clone))
2416                return -ENOMEM;
2417
2418        /* For direct write, we need to keep the invariant that the skbs
2419         * we're dealing with need to be uncloned. Should uncloning fail
2420         * here, we need to free the just generated clone to unclone once
2421         * again.
2422         */
2423        ret = bpf_try_make_head_writable(skb);
2424        if (unlikely(ret)) {
2425                kfree_skb(clone);
2426                return -ENOMEM;
2427        }
2428
2429        return __bpf_redirect(clone, dev, flags);
2430}
2431
2432static const struct bpf_func_proto bpf_clone_redirect_proto = {
2433        .func           = bpf_clone_redirect,
2434        .gpl_only       = false,
2435        .ret_type       = RET_INTEGER,
2436        .arg1_type      = ARG_PTR_TO_CTX,
2437        .arg2_type      = ARG_ANYTHING,
2438        .arg3_type      = ARG_ANYTHING,
2439};
2440
2441DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2442EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
2443
2444int skb_do_redirect(struct sk_buff *skb)
2445{
2446        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2447        struct net *net = dev_net(skb->dev);
2448        struct net_device *dev;
2449        u32 flags = ri->flags;
2450
2451        dev = dev_get_by_index_rcu(net, ri->tgt_index);
2452        ri->tgt_index = 0;
2453        ri->flags = 0;
2454        if (unlikely(!dev))
2455                goto out_drop;
2456        if (flags & BPF_F_PEER) {
2457                const struct net_device_ops *ops = dev->netdev_ops;
2458
2459                if (unlikely(!ops->ndo_get_peer_dev ||
2460                             !skb_at_tc_ingress(skb)))
2461                        goto out_drop;
2462                dev = ops->ndo_get_peer_dev(dev);
2463                if (unlikely(!dev ||
2464                             !(dev->flags & IFF_UP) ||
2465                             net_eq(net, dev_net(dev))))
2466                        goto out_drop;
2467                skb->dev = dev;
2468                return -EAGAIN;
2469        }
2470        return flags & BPF_F_NEIGH ?
2471               __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
2472                                    &ri->nh : NULL) :
2473               __bpf_redirect(skb, dev, flags);
2474out_drop:
2475        kfree_skb(skb);
2476        return -EINVAL;
2477}
2478
2479BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2480{
2481        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2482
2483        if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
2484                return TC_ACT_SHOT;
2485
2486        ri->flags = flags;
2487        ri->tgt_index = ifindex;
2488
2489        return TC_ACT_REDIRECT;
2490}
2491
2492static const struct bpf_func_proto bpf_redirect_proto = {
2493        .func           = bpf_redirect,
2494        .gpl_only       = false,
2495        .ret_type       = RET_INTEGER,
2496        .arg1_type      = ARG_ANYTHING,
2497        .arg2_type      = ARG_ANYTHING,
2498};
2499
2500BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
2501{
2502        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2503
2504        if (unlikely(flags))
2505                return TC_ACT_SHOT;
2506
2507        ri->flags = BPF_F_PEER;
2508        ri->tgt_index = ifindex;
2509
2510        return TC_ACT_REDIRECT;
2511}
2512
2513static const struct bpf_func_proto bpf_redirect_peer_proto = {
2514        .func           = bpf_redirect_peer,
2515        .gpl_only       = false,
2516        .ret_type       = RET_INTEGER,
2517        .arg1_type      = ARG_ANYTHING,
2518        .arg2_type      = ARG_ANYTHING,
2519};
2520
2521BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
2522           int, plen, u64, flags)
2523{
2524        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2525
2526        if (unlikely((plen && plen < sizeof(*params)) || flags))
2527                return TC_ACT_SHOT;
2528
2529        ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
2530        ri->tgt_index = ifindex;
2531
2532        BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
2533        if (plen)
2534                memcpy(&ri->nh, params, sizeof(ri->nh));
2535
2536        return TC_ACT_REDIRECT;
2537}
2538
2539static const struct bpf_func_proto bpf_redirect_neigh_proto = {
2540        .func           = bpf_redirect_neigh,
2541        .gpl_only       = false,
2542        .ret_type       = RET_INTEGER,
2543        .arg1_type      = ARG_ANYTHING,
2544        .arg2_type      = ARG_PTR_TO_MEM_OR_NULL,
2545        .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
2546        .arg4_type      = ARG_ANYTHING,
2547};
2548
2549BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2550{
2551        msg->apply_bytes = bytes;
2552        return 0;
2553}
2554
2555static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2556        .func           = bpf_msg_apply_bytes,
2557        .gpl_only       = false,
2558        .ret_type       = RET_INTEGER,
2559        .arg1_type      = ARG_PTR_TO_CTX,
2560        .arg2_type      = ARG_ANYTHING,
2561};
2562
2563BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
2564{
2565        msg->cork_bytes = bytes;
2566        return 0;
2567}
2568
2569static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2570        .func           = bpf_msg_cork_bytes,
2571        .gpl_only       = false,
2572        .ret_type       = RET_INTEGER,
2573        .arg1_type      = ARG_PTR_TO_CTX,
2574        .arg2_type      = ARG_ANYTHING,
2575};
2576
2577BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2578           u32, end, u64, flags)
2579{
2580        u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
2581        u32 first_sge, last_sge, i, shift, bytes_sg_total;
2582        struct scatterlist *sge;
2583        u8 *raw, *to, *from;
2584        struct page *page;
2585
2586        if (unlikely(flags || end <= start))
2587                return -EINVAL;
2588
2589        /* First find the starting scatterlist element */
2590        i = msg->sg.start;
2591        do {
2592                offset += len;
2593                len = sk_msg_elem(msg, i)->length;
2594                if (start < offset + len)
2595                        break;
2596                sk_msg_iter_var_next(i);
2597        } while (i != msg->sg.end);
2598
2599        if (unlikely(start >= offset + len))
2600                return -EINVAL;
2601
2602        first_sge = i;
2603        /* The start may point into the sg element so we need to also
2604         * account for the headroom.
2605         */
2606        bytes_sg_total = start - offset + bytes;
2607        if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
2608                goto out;
2609
2610        /* At this point we need to linearize multiple scatterlist
2611         * elements or a single shared page. Either way we need to
2612         * copy into a linear buffer exclusively owned by BPF. Then
2613         * place the buffer in the scatterlist and fixup the original
2614         * entries by removing the entries now in the linear buffer
2615         * and shifting the remaining entries. For now we do not try
2616         * to copy partial entries to avoid complexity of running out
2617         * of sg_entry slots. The downside is reading a single byte
2618         * will copy the entire sg entry.
2619         */
2620        do {
2621                copy += sk_msg_elem(msg, i)->length;
2622                sk_msg_iter_var_next(i);
2623                if (bytes_sg_total <= copy)
2624                        break;
2625        } while (i != msg->sg.end);
2626        last_sge = i;
2627
2628        if (unlikely(bytes_sg_total > copy))
2629                return -EINVAL;
2630
2631        page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2632                           get_order(copy));
2633        if (unlikely(!page))
2634                return -ENOMEM;
2635
2636        raw = page_address(page);
2637        i = first_sge;
2638        do {
2639                sge = sk_msg_elem(msg, i);
2640                from = sg_virt(sge);
2641                len = sge->length;
2642                to = raw + poffset;
2643
2644                memcpy(to, from, len);
2645                poffset += len;
2646                sge->length = 0;
2647                put_page(sg_page(sge));
2648
2649                sk_msg_iter_var_next(i);
2650        } while (i != last_sge);
2651
2652        sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
2653
2654        /* To repair sg ring we need to shift entries. If we only
2655         * had a single entry though we can just replace it and
2656         * be done. Otherwise walk the ring and shift the entries.
2657         */
2658        WARN_ON_ONCE(last_sge == first_sge);
2659        shift = last_sge > first_sge ?
2660                last_sge - first_sge - 1 :
2661                NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
2662        if (!shift)
2663                goto out;
2664
2665        i = first_sge;
2666        sk_msg_iter_var_next(i);
2667        do {
2668                u32 move_from;
2669
2670                if (i + shift >= NR_MSG_FRAG_IDS)
2671                        move_from = i + shift - NR_MSG_FRAG_IDS;
2672                else
2673                        move_from = i + shift;
2674                if (move_from == msg->sg.end)
2675                        break;
2676
2677                msg->sg.data[i] = msg->sg.data[move_from];
2678                msg->sg.data[move_from].length = 0;
2679                msg->sg.data[move_from].page_link = 0;
2680                msg->sg.data[move_from].offset = 0;
2681                sk_msg_iter_var_next(i);
2682        } while (1);
2683
2684        msg->sg.end = msg->sg.end - shift > msg->sg.end ?
2685                      msg->sg.end - shift + NR_MSG_FRAG_IDS :
2686                      msg->sg.end - shift;
2687out:
2688        msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
2689        msg->data_end = msg->data + bytes;
2690        return 0;
2691}
2692
2693static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2694        .func           = bpf_msg_pull_data,
2695        .gpl_only       = false,
2696        .ret_type       = RET_INTEGER,
2697        .arg1_type      = ARG_PTR_TO_CTX,
2698        .arg2_type      = ARG_ANYTHING,
2699        .arg3_type      = ARG_ANYTHING,
2700        .arg4_type      = ARG_ANYTHING,
2701};
2702
2703BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2704           u32, len, u64, flags)
2705{
2706        struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
2707        u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
2708        u8 *raw, *to, *from;
2709        struct page *page;
2710
2711        if (unlikely(flags))
2712                return -EINVAL;
2713
2714        /* First find the starting scatterlist element */
2715        i = msg->sg.start;
2716        do {
2717                offset += l;
2718                l = sk_msg_elem(msg, i)->length;
2719
2720                if (start < offset + l)
2721                        break;
2722                sk_msg_iter_var_next(i);
2723        } while (i != msg->sg.end);
2724
2725        if (start >= offset + l)
2726                return -EINVAL;
2727
2728        space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2729
2730        /* If no space available will fallback to copy, we need at
2731         * least one scatterlist elem available to push data into
2732         * when start aligns to the beginning of an element or two
2733         * when it falls inside an element. We handle the start equals
2734         * offset case because its the common case for inserting a
2735         * header.
2736         */
2737        if (!space || (space == 1 && start != offset))
2738                copy = msg->sg.data[i].length;
2739
2740        page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2741                           get_order(copy + len));
2742        if (unlikely(!page))
2743                return -ENOMEM;
2744
2745        if (copy) {
2746                int front, back;
2747
2748                raw = page_address(page);
2749
2750                psge = sk_msg_elem(msg, i);
2751                front = start - offset;
2752                back = psge->length - front;
2753                from = sg_virt(psge);
2754
2755                if (front)
2756                        memcpy(raw, from, front);
2757
2758                if (back) {
2759                        from += front;
2760                        to = raw + front + len;
2761
2762                        memcpy(to, from, back);
2763                }
2764
2765                put_page(sg_page(psge));
2766        } else if (start - offset) {
2767                psge = sk_msg_elem(msg, i);
2768                rsge = sk_msg_elem_cpy(msg, i);
2769
2770                psge->length = start - offset;
2771                rsge.length -= psge->length;
2772                rsge.offset += start;
2773
2774                sk_msg_iter_var_next(i);
2775                sg_unmark_end(psge);
2776                sg_unmark_end(&rsge);
2777                sk_msg_iter_next(msg, end);
2778        }
2779
2780        /* Slot(s) to place newly allocated data */
2781        new = i;
2782
2783        /* Shift one or two slots as needed */
2784        if (!copy) {
2785                sge = sk_msg_elem_cpy(msg, i);
2786
2787                sk_msg_iter_var_next(i);
2788                sg_unmark_end(&sge);
2789                sk_msg_iter_next(msg, end);
2790
2791                nsge = sk_msg_elem_cpy(msg, i);
2792                if (rsge.length) {
2793                        sk_msg_iter_var_next(i);
2794                        nnsge = sk_msg_elem_cpy(msg, i);
2795                }
2796
2797                while (i != msg->sg.end) {
2798                        msg->sg.data[i] = sge;
2799                        sge = nsge;
2800                        sk_msg_iter_var_next(i);
2801                        if (rsge.length) {
2802                                nsge = nnsge;
2803                                nnsge = sk_msg_elem_cpy(msg, i);
2804                        } else {
2805                                nsge = sk_msg_elem_cpy(msg, i);
2806                        }
2807                }
2808        }
2809
2810        /* Place newly allocated data buffer */
2811        sk_mem_charge(msg->sk, len);
2812        msg->sg.size += len;
2813        __clear_bit(new, &msg->sg.copy);
2814        sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2815        if (rsge.length) {
2816                get_page(sg_page(&rsge));
2817                sk_msg_iter_var_next(new);
2818                msg->sg.data[new] = rsge;
2819        }
2820
2821        sk_msg_compute_data_pointers(msg);
2822        return 0;
2823}
2824
2825static const struct bpf_func_proto bpf_msg_push_data_proto = {
2826        .func           = bpf_msg_push_data,
2827        .gpl_only       = false,
2828        .ret_type       = RET_INTEGER,
2829        .arg1_type      = ARG_PTR_TO_CTX,
2830        .arg2_type      = ARG_ANYTHING,
2831        .arg3_type      = ARG_ANYTHING,
2832        .arg4_type      = ARG_ANYTHING,
2833};
2834
2835static void sk_msg_shift_left(struct sk_msg *msg, int i)
2836{
2837        int prev;
2838
2839        do {
2840                prev = i;
2841                sk_msg_iter_var_next(i);
2842                msg->sg.data[prev] = msg->sg.data[i];
2843        } while (i != msg->sg.end);
2844
2845        sk_msg_iter_prev(msg, end);
2846}
2847
2848static void sk_msg_shift_right(struct sk_msg *msg, int i)
2849{
2850        struct scatterlist tmp, sge;
2851
2852        sk_msg_iter_next(msg, end);
2853        sge = sk_msg_elem_cpy(msg, i);
2854        sk_msg_iter_var_next(i);
2855        tmp = sk_msg_elem_cpy(msg, i);
2856
2857        while (i != msg->sg.end) {
2858                msg->sg.data[i] = sge;
2859                sk_msg_iter_var_next(i);
2860                sge = tmp;
2861                tmp = sk_msg_elem_cpy(msg, i);
2862        }
2863}
2864
2865BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2866           u32, len, u64, flags)
2867{
2868        u32 i = 0, l = 0, space, offset = 0;
2869        u64 last = start + len;
2870        int pop;
2871
2872        if (unlikely(flags))
2873                return -EINVAL;
2874
2875        /* First find the starting scatterlist element */
2876        i = msg->sg.start;
2877        do {
2878                offset += l;
2879                l = sk_msg_elem(msg, i)->length;
2880
2881                if (start < offset + l)
2882                        break;
2883                sk_msg_iter_var_next(i);
2884        } while (i != msg->sg.end);
2885
2886        /* Bounds checks: start and pop must be inside message */
2887        if (start >= offset + l || last >= msg->sg.size)
2888                return -EINVAL;
2889
2890        space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2891
2892        pop = len;
2893        /* --------------| offset
2894         * -| start      |-------- len -------|
2895         *
2896         *  |----- a ----|-------- pop -------|----- b ----|
2897         *  |______________________________________________| length
2898         *
2899         *
2900         * a:   region at front of scatter element to save
2901         * b:   region at back of scatter element to save when length > A + pop
2902         * pop: region to pop from element, same as input 'pop' here will be
2903         *      decremented below per iteration.
2904         *
2905         * Two top-level cases to handle when start != offset, first B is non
2906         * zero and second B is zero corresponding to when a pop includes more
2907         * than one element.
2908         *
2909         * Then if B is non-zero AND there is no space allocate space and
2910         * compact A, B regions into page. If there is space shift ring to
2911         * the rigth free'ing the next element in ring to place B, leaving
2912         * A untouched except to reduce length.
2913         */
2914        if (start != offset) {
2915                struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
2916                int a = start;
2917                int b = sge->length - pop - a;
2918
2919                sk_msg_iter_var_next(i);
2920
2921                if (pop < sge->length - a) {
2922                        if (space) {
2923                                sge->length = a;
2924                                sk_msg_shift_right(msg, i);
2925                                nsge = sk_msg_elem(msg, i);
2926                                get_page(sg_page(sge));
2927                                sg_set_page(nsge,
2928                                            sg_page(sge),
2929                                            b, sge->offset + pop + a);
2930                        } else {
2931                                struct page *page, *orig;
2932                                u8 *to, *from;
2933
2934                                page = alloc_pages(__GFP_NOWARN |
2935                                                   __GFP_COMP   | GFP_ATOMIC,
2936                                                   get_order(a + b));
2937                                if (unlikely(!page))
2938                                        return -ENOMEM;
2939
2940                                sge->length = a;
2941                                orig = sg_page(sge);
2942                                from = sg_virt(sge);
2943                                to = page_address(page);
2944                                memcpy(to, from, a);
2945                                memcpy(to + a, from + a + pop, b);
2946                                sg_set_page(sge, page, a + b, 0);
2947                                put_page(orig);
2948                        }
2949                        pop = 0;
2950                } else if (pop >= sge->length - a) {
2951                        pop -= (sge->length - a);
2952                        sge->length = a;
2953                }
2954        }
2955
2956        /* From above the current layout _must_ be as follows,
2957         *
2958         * -| offset
2959         * -| start
2960         *
2961         *  |---- pop ---|---------------- b ------------|
2962         *  |____________________________________________| length
2963         *
2964         * Offset and start of the current msg elem are equal because in the
2965         * previous case we handled offset != start and either consumed the
2966         * entire element and advanced to the next element OR pop == 0.
2967         *
2968         * Two cases to handle here are first pop is less than the length
2969         * leaving some remainder b above. Simply adjust the element's layout
2970         * in this case. Or pop >= length of the element so that b = 0. In this
2971         * case advance to next element decrementing pop.
2972         */
2973        while (pop) {
2974                struct scatterlist *sge = sk_msg_elem(msg, i);
2975
2976                if (pop < sge->length) {
2977                        sge->length -= pop;
2978                        sge->offset += pop;
2979                        pop = 0;
2980                } else {
2981                        pop -= sge->length;
2982                        sk_msg_shift_left(msg, i);
2983                }
2984                sk_msg_iter_var_next(i);
2985        }
2986
2987        sk_mem_uncharge(msg->sk, len - pop);
2988        msg->sg.size -= (len - pop);
2989        sk_msg_compute_data_pointers(msg);
2990        return 0;
2991}
2992
2993static const struct bpf_func_proto bpf_msg_pop_data_proto = {
2994        .func           = bpf_msg_pop_data,
2995        .gpl_only       = false,
2996        .ret_type       = RET_INTEGER,
2997        .arg1_type      = ARG_PTR_TO_CTX,
2998        .arg2_type      = ARG_ANYTHING,
2999        .arg3_type      = ARG_ANYTHING,
3000        .arg4_type      = ARG_ANYTHING,
3001};
3002
3003#ifdef CONFIG_CGROUP_NET_CLASSID
3004BPF_CALL_0(bpf_get_cgroup_classid_curr)
3005{
3006        return __task_get_classid(current);
3007}
3008
3009static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
3010        .func           = bpf_get_cgroup_classid_curr,
3011        .gpl_only       = false,
3012        .ret_type       = RET_INTEGER,
3013};
3014
3015BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
3016{
3017        struct sock *sk = skb_to_full_sk(skb);
3018
3019        if (!sk || !sk_fullsock(sk))
3020                return 0;
3021
3022        return sock_cgroup_classid(&sk->sk_cgrp_data);
3023}
3024
3025static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
3026        .func           = bpf_skb_cgroup_classid,
3027        .gpl_only       = false,
3028        .ret_type       = RET_INTEGER,
3029        .arg1_type      = ARG_PTR_TO_CTX,
3030};
3031#endif
3032
3033BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
3034{
3035        return task_get_classid(skb);
3036}
3037
3038static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
3039        .func           = bpf_get_cgroup_classid,
3040        .gpl_only       = false,
3041        .ret_type       = RET_INTEGER,
3042        .arg1_type      = ARG_PTR_TO_CTX,
3043};
3044
3045BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
3046{
3047        return dst_tclassid(skb);
3048}
3049
3050static const struct bpf_func_proto bpf_get_route_realm_proto = {
3051        .func           = bpf_get_route_realm,
3052        .gpl_only       = false,
3053        .ret_type       = RET_INTEGER,
3054        .arg1_type      = ARG_PTR_TO_CTX,
3055};
3056
3057BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
3058{
3059        /* If skb_clear_hash() was called due to mangling, we can
3060         * trigger SW recalculation here. Later access to hash
3061         * can then use the inline skb->hash via context directly
3062         * instead of calling this helper again.
3063         */
3064        return skb_get_hash(skb);
3065}
3066
3067static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
3068        .func           = bpf_get_hash_recalc,
3069        .gpl_only       = false,
3070        .ret_type       = RET_INTEGER,
3071        .arg1_type      = ARG_PTR_TO_CTX,
3072};
3073
3074BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
3075{
3076        /* After all direct packet write, this can be used once for
3077         * triggering a lazy recalc on next skb_get_hash() invocation.
3078         */
3079        skb_clear_hash(skb);
3080        return 0;
3081}
3082
3083static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
3084        .func           = bpf_set_hash_invalid,
3085        .gpl_only       = false,
3086        .ret_type       = RET_INTEGER,
3087        .arg1_type      = ARG_PTR_TO_CTX,
3088};
3089
3090BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
3091{
3092        /* Set user specified hash as L4(+), so that it gets returned
3093         * on skb_get_hash() call unless BPF prog later on triggers a
3094         * skb_clear_hash().
3095         */
3096        __skb_set_sw_hash(skb, hash, true);
3097        return 0;
3098}
3099
3100static const struct bpf_func_proto bpf_set_hash_proto = {
3101        .func           = bpf_set_hash,
3102        .gpl_only       = false,
3103        .ret_type       = RET_INTEGER,
3104        .arg1_type      = ARG_PTR_TO_CTX,
3105        .arg2_type      = ARG_ANYTHING,
3106};
3107
3108BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
3109           u16, vlan_tci)
3110{
3111        int ret;
3112
3113        if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
3114                     vlan_proto != htons(ETH_P_8021AD)))
3115                vlan_proto = htons(ETH_P_8021Q);
3116
3117        bpf_push_mac_rcsum(skb);
3118        ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
3119        bpf_pull_mac_rcsum(skb);
3120
3121        bpf_compute_data_pointers(skb);
3122        return ret;
3123}
3124
3125static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
3126        .func           = bpf_skb_vlan_push,
3127        .gpl_only       = false,
3128        .ret_type       = RET_INTEGER,
3129        .arg1_type      = ARG_PTR_TO_CTX,
3130        .arg2_type      = ARG_ANYTHING,
3131        .arg3_type      = ARG_ANYTHING,
3132};
3133
3134BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
3135{
3136        int ret;
3137
3138        bpf_push_mac_rcsum(skb);
3139        ret = skb_vlan_pop(skb);
3140        bpf_pull_mac_rcsum(skb);
3141
3142        bpf_compute_data_pointers(skb);
3143        return ret;
3144}
3145
3146static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
3147        .func           = bpf_skb_vlan_pop,
3148        .gpl_only       = false,
3149        .ret_type       = RET_INTEGER,
3150        .arg1_type      = ARG_PTR_TO_CTX,
3151};
3152
3153static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
3154{
3155        /* Caller already did skb_cow() with len as headroom,
3156         * so no need to do it here.
3157         */
3158        skb_push(skb, len);
3159        memmove(skb->data, skb->data + len, off);
3160        memset(skb->data + off, 0, len);
3161
3162        /* No skb_postpush_rcsum(skb, skb->data + off, len)
3163         * needed here as it does not change the skb->csum
3164         * result for checksum complete when summing over
3165         * zeroed blocks.
3166         */
3167        return 0;
3168}
3169
3170static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
3171{
3172        /* skb_ensure_writable() is not needed here, as we're
3173         * already working on an uncloned skb.
3174         */
3175        if (unlikely(!pskb_may_pull(skb, off + len)))
3176                return -ENOMEM;
3177
3178        skb_postpull_rcsum(skb, skb->data + off, len);
3179        memmove(skb->data + len, skb->data, off);
3180        __skb_pull(skb, len);
3181
3182        return 0;
3183}
3184
3185static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
3186{
3187        bool trans_same = skb->transport_header == skb->network_header;
3188        int ret;
3189
3190        /* There's no need for __skb_push()/__skb_pull() pair to
3191         * get to the start of the mac header as we're guaranteed
3192         * to always start from here under eBPF.
3193         */
3194        ret = bpf_skb_generic_push(skb, off, len);
3195        if (likely(!ret)) {
3196                skb->mac_header -= len;
3197                skb->network_header -= len;
3198                if (trans_same)
3199                        skb->transport_header = skb->network_header;
3200        }
3201
3202        return ret;
3203}
3204
3205static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
3206{
3207        bool trans_same = skb->transport_header == skb->network_header;
3208        int ret;
3209
3210        /* Same here, __skb_push()/__skb_pull() pair not needed. */
3211        ret = bpf_skb_generic_pop(skb, off, len);
3212        if (likely(!ret)) {
3213                skb->mac_header += len;
3214                skb->network_header += len;
3215                if (trans_same)
3216                        skb->transport_header = skb->network_header;
3217        }
3218
3219        return ret;
3220}
3221
3222static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
3223{
3224        const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3225        u32 off = skb_mac_header_len(skb);
3226        int ret;
3227
3228        ret = skb_cow(skb, len_diff);
3229        if (unlikely(ret < 0))
3230                return ret;
3231
3232        ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3233        if (unlikely(ret < 0))
3234                return ret;
3235
3236        if (skb_is_gso(skb)) {
3237                struct skb_shared_info *shinfo = skb_shinfo(skb);
3238
3239                /* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
3240                if (shinfo->gso_type & SKB_GSO_TCPV4) {
3241                        shinfo->gso_type &= ~SKB_GSO_TCPV4;
3242                        shinfo->gso_type |=  SKB_GSO_TCPV6;
3243                }
3244        }
3245
3246        skb->protocol = htons(ETH_P_IPV6);
3247        skb_clear_hash(skb);
3248
3249        return 0;
3250}
3251
3252static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
3253{
3254        const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3255        u32 off = skb_mac_header_len(skb);
3256        int ret;
3257
3258        ret = skb_unclone(skb, GFP_ATOMIC);
3259        if (unlikely(ret < 0))
3260                return ret;
3261
3262        ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3263        if (unlikely(ret < 0))
3264                return ret;
3265
3266        if (skb_is_gso(skb)) {
3267                struct skb_shared_info *shinfo = skb_shinfo(skb);
3268
3269                /* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
3270                if (shinfo->gso_type & SKB_GSO_TCPV6) {
3271                        shinfo->gso_type &= ~SKB_GSO_TCPV6;
3272                        shinfo->gso_type |=  SKB_GSO_TCPV4;
3273                }
3274        }
3275
3276        skb->protocol = htons(ETH_P_IP);
3277        skb_clear_hash(skb);
3278
3279        return 0;
3280}
3281
3282static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
3283{
3284        __be16 from_proto = skb->protocol;
3285
3286        if (from_proto == htons(ETH_P_IP) &&
3287              to_proto == htons(ETH_P_IPV6))
3288                return bpf_skb_proto_4_to_6(skb);
3289
3290        if (from_proto == htons(ETH_P_IPV6) &&
3291              to_proto == htons(ETH_P_IP))
3292                return bpf_skb_proto_6_to_4(skb);
3293
3294        return -ENOTSUPP;
3295}
3296
3297BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
3298           u64, flags)
3299{
3300        int ret;
3301
3302        if (unlikely(flags))
3303                return -EINVAL;
3304
3305        /* General idea is that this helper does the basic groundwork
3306         * needed for changing the protocol, and eBPF program fills the
3307         * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
3308         * and other helpers, rather than passing a raw buffer here.
3309         *
3310         * The rationale is to keep this minimal and without a need to
3311         * deal with raw packet data. F.e. even if we would pass buffers
3312         * here, the program still needs to call the bpf_lX_csum_replace()
3313         * helpers anyway. Plus, this way we keep also separation of
3314         * concerns, since f.e. bpf_skb_store_bytes() should only take
3315         * care of stores.
3316         *
3317         * Currently, additional options and extension header space are
3318         * not supported, but flags register is reserved so we can adapt
3319         * that. For offloads, we mark packet as dodgy, so that headers
3320         * need to be verified first.
3321         */
3322        ret = bpf_skb_proto_xlat(skb, proto);
3323        bpf_compute_data_pointers(skb);
3324        return ret;
3325}
3326
3327static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3328        .func           = bpf_skb_change_proto,
3329        .gpl_only       = false,
3330        .ret_type       = RET_INTEGER,
3331        .arg1_type      = ARG_PTR_TO_CTX,
3332        .arg2_type      = ARG_ANYTHING,
3333        .arg3_type      = ARG_ANYTHING,
3334};
3335
3336BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
3337{
3338        /* We only allow a restricted subset to be changed for now. */
3339        if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
3340                     !skb_pkt_type_ok(pkt_type)))
3341                return -EINVAL;
3342
3343        skb->pkt_type = pkt_type;
3344        return 0;
3345}
3346
3347static const struct bpf_func_proto bpf_skb_change_type_proto = {
3348        .func           = bpf_skb_change_type,
3349        .gpl_only       = false,
3350        .ret_type       = RET_INTEGER,
3351        .arg1_type      = ARG_PTR_TO_CTX,
3352        .arg2_type      = ARG_ANYTHING,
3353};
3354
3355static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3356{
3357        switch (skb->protocol) {
3358        case htons(ETH_P_IP):
3359                return sizeof(struct iphdr);
3360        case htons(ETH_P_IPV6):
3361                return sizeof(struct ipv6hdr);
3362        default:
3363                return ~0U;
3364        }
3365}
3366
3367#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK    (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
3368                                         BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3369
3370#define BPF_F_ADJ_ROOM_MASK             (BPF_F_ADJ_ROOM_FIXED_GSO | \
3371                                         BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
3372                                         BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
3373                                         BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
3374                                         BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
3375                                         BPF_F_ADJ_ROOM_ENCAP_L2( \
3376                                          BPF_ADJ_ROOM_ENCAP_L2_MASK))
3377
3378static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3379                            u64 flags)
3380{
3381        u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
3382        bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
3383        u16 mac_len = 0, inner_net = 0, inner_trans = 0;
3384        unsigned int gso_type = SKB_GSO_DODGY;
3385        int ret;
3386
3387        if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3388                /* udp gso_size delineates datagrams, only allow if fixed */
3389                if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3390                    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3391                        return -ENOTSUPP;
3392        }
3393
3394        ret = skb_cow_head(skb, len_diff);
3395        if (unlikely(ret < 0))
3396                return ret;
3397
3398        if (encap) {
3399                if (skb->protocol != htons(ETH_P_IP) &&
3400                    skb->protocol != htons(ETH_P_IPV6))
3401                        return -ENOTSUPP;
3402
3403                if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3404                    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3405                        return -EINVAL;
3406
3407                if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3408                    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3409                        return -EINVAL;
3410
3411                if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
3412                    inner_mac_len < ETH_HLEN)
3413                        return -EINVAL;
3414
3415                if (skb->encapsulation)
3416                        return -EALREADY;
3417
3418                mac_len = skb->network_header - skb->mac_header;
3419                inner_net = skb->network_header;
3420                if (inner_mac_len > len_diff)
3421                        return -EINVAL;
3422                inner_trans = skb->transport_header;
3423        }
3424
3425        ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3426        if (unlikely(ret < 0))
3427                return ret;
3428
3429        if (encap) {
3430                skb->inner_mac_header = inner_net - inner_mac_len;
3431                skb->inner_network_header = inner_net;
3432                skb->inner_transport_header = inner_trans;
3433
3434                if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
3435                        skb_set_inner_protocol(skb, htons(ETH_P_TEB));
3436                else
3437                        skb_set_inner_protocol(skb, skb->protocol);
3438
3439                skb->encapsulation = 1;
3440                skb_set_network_header(skb, mac_len);
3441
3442                if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3443                        gso_type |= SKB_GSO_UDP_TUNNEL;
3444                else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3445                        gso_type |= SKB_GSO_GRE;
3446                else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3447                        gso_type |= SKB_GSO_IPXIP6;
3448                else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3449                        gso_type |= SKB_GSO_IPXIP4;
3450
3451                if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
3452                    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3453                        int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3454                                        sizeof(struct ipv6hdr) :
3455                                        sizeof(struct iphdr);
3456
3457                        skb_set_transport_header(skb, mac_len + nh_len);
3458                }
3459
3460                /* Match skb->protocol to new outer l3 protocol */
3461                if (skb->protocol == htons(ETH_P_IP) &&
3462                    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3463                        skb->protocol = htons(ETH_P_IPV6);
3464                else if (skb->protocol == htons(ETH_P_IPV6) &&
3465                         flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3466                        skb->protocol = htons(ETH_P_IP);
3467        }
3468
3469        if (skb_is_gso(skb)) {
3470                struct skb_shared_info *shinfo = skb_shinfo(skb);
3471
3472                /* Due to header grow, MSS needs to be downgraded. */
3473                if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3474                        skb_decrease_gso_size(shinfo, len_diff);
3475
3476                /* Header must be checked, and gso_segs recomputed. */
3477                shinfo->gso_type |= gso_type;
3478                shinfo->gso_segs = 0;
3479        }
3480
3481        return 0;
3482}
3483
3484static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3485                              u64 flags)
3486{
3487        int ret;
3488
3489        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
3490                               BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3491                return -EINVAL;
3492
3493        if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3494                /* udp gso_size delineates datagrams, only allow if fixed */
3495                if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3496                    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3497                        return -ENOTSUPP;
3498        }
3499
3500        ret = skb_unclone(skb, GFP_ATOMIC);
3501        if (unlikely(ret < 0))
3502                return ret;
3503
3504        ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3505        if (unlikely(ret < 0))
3506                return ret;
3507
3508        if (skb_is_gso(skb)) {
3509                struct skb_shared_info *shinfo = skb_shinfo(skb);
3510
3511                /* Due to header shrink, MSS can be upgraded. */
3512                if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3513                        skb_increase_gso_size(shinfo, len_diff);
3514
3515                /* Header must be checked, and gso_segs recomputed. */
3516                shinfo->gso_type |= SKB_GSO_DODGY;
3517                shinfo->gso_segs = 0;
3518        }
3519
3520        return 0;
3521}
3522
3523#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
3524
3525BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3526           u32, mode, u64, flags)
3527{
3528        u32 len_diff_abs = abs(len_diff);
3529        bool shrink = len_diff < 0;
3530        int ret = 0;
3531
3532        if (unlikely(flags || mode))
3533                return -EINVAL;
3534        if (unlikely(len_diff_abs > 0xfffU))
3535                return -EFAULT;
3536
3537        if (!shrink) {
3538                ret = skb_cow(skb, len_diff);
3539                if (unlikely(ret < 0))
3540                        return ret;
3541                __skb_push(skb, len_diff_abs);
3542                memset(skb->data, 0, len_diff_abs);
3543        } else {
3544                if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
3545                        return -ENOMEM;
3546                __skb_pull(skb, len_diff_abs);
3547        }
3548        if (tls_sw_has_ctx_rx(skb->sk)) {
3549                struct strp_msg *rxm = strp_msg(skb);
3550
3551                rxm->full_len += len_diff;
3552        }
3553        return ret;
3554}
3555
3556static const struct bpf_func_proto sk_skb_adjust_room_proto = {
3557        .func           = sk_skb_adjust_room,
3558        .gpl_only       = false,
3559        .ret_type       = RET_INTEGER,
3560        .arg1_type      = ARG_PTR_TO_CTX,
3561        .arg2_type      = ARG_ANYTHING,
3562        .arg3_type      = ARG_ANYTHING,
3563        .arg4_type      = ARG_ANYTHING,
3564};
3565
3566BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3567           u32, mode, u64, flags)
3568{
3569        u32 len_cur, len_diff_abs = abs(len_diff);
3570        u32 len_min = bpf_skb_net_base_len(skb);
3571        u32 len_max = BPF_SKB_MAX_LEN;
3572        __be16 proto = skb->protocol;
3573        bool shrink = len_diff < 0;
3574        u32 off;
3575        int ret;
3576
3577        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
3578                               BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3579                return -EINVAL;
3580        if (unlikely(len_diff_abs > 0xfffU))
3581                return -EFAULT;
3582        if (unlikely(proto != htons(ETH_P_IP) &&
3583                     proto != htons(ETH_P_IPV6)))
3584                return -ENOTSUPP;
3585
3586        off = skb_mac_header_len(skb);
3587        switch (mode) {
3588        case BPF_ADJ_ROOM_NET:
3589                off += bpf_skb_net_base_len(skb);
3590                break;
3591        case BPF_ADJ_ROOM_MAC:
3592                break;
3593        default:
3594                return -ENOTSUPP;
3595        }
3596
3597        len_cur = skb->len - skb_network_offset(skb);
3598        if ((shrink && (len_diff_abs >= len_cur ||
3599                        len_cur - len_diff_abs < len_min)) ||
3600            (!shrink && (skb->len + len_diff_abs > len_max &&
3601                         !skb_is_gso(skb))))
3602                return -ENOTSUPP;
3603
3604        ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
3605                       bpf_skb_net_grow(skb, off, len_diff_abs, flags);
3606        if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3607                __skb_reset_checksum_unnecessary(skb);
3608
3609        bpf_compute_data_pointers(skb);
3610        return ret;
3611}
3612
3613static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3614        .func           = bpf_skb_adjust_room,
3615        .gpl_only       = false,
3616        .ret_type       = RET_INTEGER,
3617        .arg1_type      = ARG_PTR_TO_CTX,
3618        .arg2_type      = ARG_ANYTHING,
3619        .arg3_type      = ARG_ANYTHING,
3620        .arg4_type      = ARG_ANYTHING,
3621};
3622
3623static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3624{
3625        u32 min_len = skb_network_offset(skb);
3626
3627        if (skb_transport_header_was_set(skb))
3628                min_len = skb_transport_offset(skb);
3629        if (skb->ip_summed == CHECKSUM_PARTIAL)
3630                min_len = skb_checksum_start_offset(skb) +
3631                          skb->csum_offset + sizeof(__sum16);
3632        return min_len;
3633}
3634
3635static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
3636{
3637        unsigned int old_len = skb->len;
3638        int ret;
3639
3640        ret = __skb_grow_rcsum(skb, new_len);
3641        if (!ret)
3642                memset(skb->data + old_len, 0, new_len - old_len);
3643        return ret;
3644}
3645
3646static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
3647{
3648        return __skb_trim_rcsum(skb, new_len);
3649}
3650
3651static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3652                                        u64 flags)
3653{
3654        u32 max_len = BPF_SKB_MAX_LEN;
3655        u32 min_len = __bpf_skb_min_len(skb);
3656        int ret;
3657
3658        if (unlikely(flags || new_len > max_len || new_len < min_len))
3659                return -EINVAL;
3660        if (skb->encapsulation)
3661                return -ENOTSUPP;
3662
3663        /* The basic idea of this helper is that it's performing the
3664         * needed work to either grow or trim an skb, and eBPF program
3665         * rewrites the rest via helpers like bpf_skb_store_bytes(),
3666         * bpf_lX_csum_replace() and others rather than passing a raw
3667         * buffer here. This one is a slow path helper and intended
3668         * for replies with control messages.
3669         *
3670         * Like in bpf_skb_change_proto(), we want to keep this rather
3671         * minimal and without protocol specifics so that we are able
3672         * to separate concerns as in bpf_skb_store_bytes() should only
3673         * be the one responsible for writing buffers.
3674         *
3675         * It's really expected to be a slow path operation here for
3676         * control message replies, so we're implicitly linearizing,
3677         * uncloning and drop offloads from the skb by this.
3678         */
3679        ret = __bpf_try_make_writable(skb, skb->len);
3680        if (!ret) {
3681                if (new_len > skb->len)
3682                        ret = bpf_skb_grow_rcsum(skb, new_len);
3683                else if (new_len < skb->len)
3684                        ret = bpf_skb_trim_rcsum(skb, new_len);
3685                if (!ret && skb_is_gso(skb))
3686                        skb_gso_reset(skb);
3687        }
3688        return ret;
3689}
3690
3691BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3692           u64, flags)
3693{
3694        int ret = __bpf_skb_change_tail(skb, new_len, flags);
3695
3696        bpf_compute_data_pointers(skb);
3697        return ret;
3698}
3699
3700static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3701        .func           = bpf_skb_change_tail,
3702        .gpl_only       = false,
3703        .ret_type       = RET_INTEGER,
3704        .arg1_type      = ARG_PTR_TO_CTX,
3705        .arg2_type      = ARG_ANYTHING,
3706        .arg3_type      = ARG_ANYTHING,
3707};
3708
3709BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3710           u64, flags)
3711{
3712        return __bpf_skb_change_tail(skb, new_len, flags);
3713}
3714
3715static const struct bpf_func_proto sk_skb_change_tail_proto = {
3716        .func           = sk_skb_change_tail,
3717        .gpl_only       = false,
3718        .ret_type       = RET_INTEGER,
3719        .arg1_type      = ARG_PTR_TO_CTX,
3720        .arg2_type      = ARG_ANYTHING,
3721        .arg3_type      = ARG_ANYTHING,
3722};
3723
3724static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3725                                        u64 flags)
3726{
3727        u32 max_len = BPF_SKB_MAX_LEN;
3728        u32 new_len = skb->len + head_room;
3729        int ret;
3730
3731        if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3732                     new_len < skb->len))
3733                return -EINVAL;
3734
3735        ret = skb_cow(skb, head_room);
3736        if (likely(!ret)) {
3737                /* Idea for this helper is that we currently only
3738                 * allow to expand on mac header. This means that
3739                 * skb->protocol network header, etc, stay as is.
3740                 * Compared to bpf_skb_change_tail(), we're more
3741                 * flexible due to not needing to linearize or
3742                 * reset GSO. Intention for this helper is to be
3743                 * used by an L3 skb that needs to push mac header
3744                 * for redirection into L2 device.
3745                 */
3746                __skb_push(skb, head_room);
3747                memset(skb->data, 0, head_room);
3748                skb_reset_mac_header(skb);
3749                skb_reset_mac_len(skb);
3750        }
3751
3752        return ret;
3753}
3754
3755BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3756           u64, flags)
3757{
3758        int ret = __bpf_skb_change_head(skb, head_room, flags);
3759
3760        bpf_compute_data_pointers(skb);
3761        return ret;
3762}
3763
3764static const struct bpf_func_proto bpf_skb_change_head_proto = {
3765        .func           = bpf_skb_change_head,
3766        .gpl_only       = false,
3767        .ret_type       = RET_INTEGER,
3768        .arg1_type      = ARG_PTR_TO_CTX,
3769        .arg2_type      = ARG_ANYTHING,
3770        .arg3_type      = ARG_ANYTHING,
3771};
3772
3773BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3774           u64, flags)
3775{
3776        return __bpf_skb_change_head(skb, head_room, flags);
3777}
3778
3779static const struct bpf_func_proto sk_skb_change_head_proto = {
3780        .func           = sk_skb_change_head,
3781        .gpl_only       = false,
3782        .ret_type       = RET_INTEGER,
3783        .arg1_type      = ARG_PTR_TO_CTX,
3784        .arg2_type      = ARG_ANYTHING,
3785        .arg3_type      = ARG_ANYTHING,
3786};
3787static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3788{
3789        return xdp_data_meta_unsupported(xdp) ? 0 :
3790               xdp->data - xdp->data_meta;
3791}
3792
3793BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3794{
3795        void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3796        unsigned long metalen = xdp_get_metalen(xdp);
3797        void *data_start = xdp_frame_end + metalen;
3798        void *data = xdp->data + offset;
3799
3800        if (unlikely(data < data_start ||
3801                     data > xdp->data_end - ETH_HLEN))
3802                return -EINVAL;
3803
3804        if (metalen)
3805                memmove(xdp->data_meta + offset,
3806                        xdp->data_meta, metalen);
3807        xdp->data_meta += offset;
3808        xdp->data = data;
3809
3810        return 0;
3811}
3812
3813static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3814        .func           = bpf_xdp_adjust_head,
3815        .gpl_only       = false,
3816        .ret_type       = RET_INTEGER,
3817        .arg1_type      = ARG_PTR_TO_CTX,
3818        .arg2_type      = ARG_ANYTHING,
3819};
3820
3821BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
3822{
3823        void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
3824        void *data_end = xdp->data_end + offset;
3825
3826        /* Notice that xdp_data_hard_end have reserved some tailroom */
3827        if (unlikely(data_end > data_hard_end))
3828                return -EINVAL;
3829
3830        /* ALL drivers MUST init xdp->frame_sz, chicken check below */
3831        if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
3832                WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
3833                return -EINVAL;
3834        }
3835
3836        if (unlikely(data_end < xdp->data + ETH_HLEN))
3837                return -EINVAL;
3838
3839        /* Clear memory area on grow, can contain uninit kernel memory */
3840        if (offset > 0)
3841                memset(xdp->data_end, 0, offset);
3842
3843        xdp->data_end = data_end;
3844
3845        return 0;
3846}
3847
3848static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
3849        .func           = bpf_xdp_adjust_tail,
3850        .gpl_only       = false,
3851        .ret_type       = RET_INTEGER,
3852        .arg1_type      = ARG_PTR_TO_CTX,
3853        .arg2_type      = ARG_ANYTHING,
3854};
3855
3856BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
3857{
3858        void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3859        void *meta = xdp->data_meta + offset;
3860        unsigned long metalen = xdp->data - meta;
3861
3862        if (xdp_data_meta_unsupported(xdp))
3863                return -ENOTSUPP;
3864        if (unlikely(meta < xdp_frame_end ||
3865                     meta > xdp->data))
3866                return -EINVAL;
3867        if (unlikely(xdp_metalen_invalid(metalen)))
3868                return -EACCES;
3869
3870        xdp->data_meta = meta;
3871
3872        return 0;
3873}
3874
3875static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
3876        .func           = bpf_xdp_adjust_meta,
3877        .gpl_only       = false,
3878        .ret_type       = RET_INTEGER,
3879        .arg1_type      = ARG_PTR_TO_CTX,
3880        .arg2_type      = ARG_ANYTHING,
3881};
3882
3883/* XDP_REDIRECT works by a three-step process, implemented in the functions
3884 * below:
3885 *
3886 * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
3887 *    of the redirect and store it (along with some other metadata) in a per-CPU
3888 *    struct bpf_redirect_info.
3889 *
3890 * 2. When the program returns the XDP_REDIRECT return code, the driver will
3891 *    call xdp_do_redirect() which will use the information in struct
3892 *    bpf_redirect_info to actually enqueue the frame into a map type-specific
3893 *    bulk queue structure.
3894 *
3895 * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
3896 *    which will flush all the different bulk queues, thus completing the
3897 *    redirect.
3898 *
3899 * Pointers to the map entries will be kept around for this whole sequence of
3900 * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
3901 * the core code; instead, the RCU protection relies on everything happening
3902 * inside a single NAPI poll sequence, which means it's between a pair of calls
3903 * to local_bh_disable()/local_bh_enable().
3904 *
3905 * The map entries are marked as __rcu and the map code makes sure to
3906 * dereference those pointers with rcu_dereference_check() in a way that works
3907 * for both sections that to hold an rcu_read_lock() and sections that are
3908 * called from NAPI without a separate rcu_read_lock(). The code below does not
3909 * use RCU annotations, but relies on those in the map code.
3910 */
3911void xdp_do_flush(void)
3912{
3913        __dev_flush();
3914        __cpu_map_flush();
3915        __xsk_map_flush();
3916}
3917EXPORT_SYMBOL_GPL(xdp_do_flush);
3918
3919void bpf_clear_redirect_map(struct bpf_map *map)
3920{
3921        struct bpf_redirect_info *ri;
3922        int cpu;
3923
3924        for_each_possible_cpu(cpu) {
3925                ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3926                /* Avoid polluting remote cacheline due to writes if
3927                 * not needed. Once we pass this test, we need the
3928                 * cmpxchg() to make sure it hasn't been changed in
3929                 * the meantime by remote CPU.
3930                 */
3931                if (unlikely(READ_ONCE(ri->map) == map))
3932                        cmpxchg(&ri->map, map, NULL);
3933        }
3934}
3935
3936DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
3937EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
3938
3939u32 xdp_master_redirect(struct xdp_buff *xdp)
3940{
3941        struct net_device *master, *slave;
3942        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3943
3944        master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
3945        slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
3946        if (slave && slave != xdp->rxq->dev) {
3947                /* The target device is different from the receiving device, so
3948                 * redirect it to the new device.
3949                 * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
3950                 * drivers to unmap the packet from their rx ring.
3951                 */
3952                ri->tgt_index = slave->ifindex;
3953                ri->map_id = INT_MAX;
3954                ri->map_type = BPF_MAP_TYPE_UNSPEC;
3955                return XDP_REDIRECT;
3956        }
3957        return XDP_TX;
3958}
3959EXPORT_SYMBOL_GPL(xdp_master_redirect);
3960
3961int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3962                    struct bpf_prog *xdp_prog)
3963{
3964        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3965        enum bpf_map_type map_type = ri->map_type;
3966        void *fwd = ri->tgt_value;
3967        u32 map_id = ri->map_id;
3968        struct bpf_map *map;
3969        int err;
3970
3971        ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
3972        ri->map_type = BPF_MAP_TYPE_UNSPEC;
3973
3974        switch (map_type) {
3975        case BPF_MAP_TYPE_DEVMAP:
3976                fallthrough;
3977        case BPF_MAP_TYPE_DEVMAP_HASH:
3978                map = READ_ONCE(ri->map);
3979                if (unlikely(map)) {
3980                        WRITE_ONCE(ri->map, NULL);
3981                        err = dev_map_enqueue_multi(xdp, dev, map,
3982                                                    ri->flags & BPF_F_EXCLUDE_INGRESS);
3983                } else {
3984                        err = dev_map_enqueue(fwd, xdp, dev);
3985                }
3986                break;
3987        case BPF_MAP_TYPE_CPUMAP:
3988                err = cpu_map_enqueue(fwd, xdp, dev);
3989                break;
3990        case BPF_MAP_TYPE_XSKMAP:
3991                err = __xsk_map_redirect(fwd, xdp);
3992                break;
3993        case BPF_MAP_TYPE_UNSPEC:
3994                if (map_id == INT_MAX) {
3995                        fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
3996                        if (unlikely(!fwd)) {
3997                                err = -EINVAL;
3998                                break;
3999                        }
4000                        err = dev_xdp_enqueue(fwd, xdp, dev);
4001                        break;
4002                }
4003                fallthrough;
4004        default:
4005                err = -EBADRQC;
4006        }
4007
4008        if (unlikely(err))
4009                goto err;
4010
4011        _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4012        return 0;
4013err:
4014        _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4015        return err;
4016}
4017EXPORT_SYMBOL_GPL(xdp_do_redirect);
4018
4019static int xdp_do_generic_redirect_map(struct net_device *dev,
4020                                       struct sk_buff *skb,
4021                                       struct xdp_buff *xdp,
4022                                       struct bpf_prog *xdp_prog,
4023                                       void *fwd,
4024                                       enum bpf_map_type map_type, u32 map_id)
4025{
4026        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4027        struct bpf_map *map;
4028        int err;
4029
4030        switch (map_type) {
4031        case BPF_MAP_TYPE_DEVMAP:
4032                fallthrough;
4033        case BPF_MAP_TYPE_DEVMAP_HASH:
4034                map = READ_ONCE(ri->map);
4035                if (unlikely(map)) {
4036                        WRITE_ONCE(ri->map, NULL);
4037                        err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
4038                                                     ri->flags & BPF_F_EXCLUDE_INGRESS);
4039                } else {
4040                        err = dev_map_generic_redirect(fwd, skb, xdp_prog);
4041                }
4042                if (unlikely(err))
4043                        goto err;
4044                break;
4045        case BPF_MAP_TYPE_XSKMAP:
4046                err = xsk_generic_rcv(fwd, xdp);
4047                if (err)
4048                        goto err;
4049                consume_skb(skb);
4050                break;
4051        case BPF_MAP_TYPE_CPUMAP:
4052                err = cpu_map_generic_redirect(fwd, skb);
4053                if (unlikely(err))
4054                        goto err;
4055                break;
4056        default:
4057                err = -EBADRQC;
4058                goto err;
4059        }
4060
4061        _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4062        return 0;
4063err:
4064        _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4065        return err;
4066}
4067
4068int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
4069                            struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
4070{
4071        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4072        enum bpf_map_type map_type = ri->map_type;
4073        void *fwd = ri->tgt_value;
4074        u32 map_id = ri->map_id;
4075        int err;
4076
4077        ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
4078        ri->map_type = BPF_MAP_TYPE_UNSPEC;
4079
4080        if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
4081                fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
4082                if (unlikely(!fwd)) {
4083                        err = -EINVAL;
4084                        goto err;
4085                }
4086
4087                err = xdp_ok_fwd_dev(fwd, skb->len);
4088                if (unlikely(err))
4089                        goto err;
4090
4091                skb->dev = fwd;
4092                _trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
4093                generic_xdp_tx(skb, xdp_prog);
4094                return 0;
4095        }
4096
4097        return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
4098err:
4099        _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
4100        return err;
4101}
4102
4103BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
4104{
4105        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4106
4107        if (unlikely(flags))
4108                return XDP_ABORTED;
4109
4110        /* NB! Map type UNSPEC and map_id == INT_MAX (never generated
4111         * by map_idr) is used for ifindex based XDP redirect.
4112         */
4113        ri->tgt_index = ifindex;
4114        ri->map_id = INT_MAX;
4115        ri->map_type = BPF_MAP_TYPE_UNSPEC;
4116
4117        return XDP_REDIRECT;
4118}
4119
4120static const struct bpf_func_proto bpf_xdp_redirect_proto = {
4121        .func           = bpf_xdp_redirect,
4122        .gpl_only       = false,
4123        .ret_type       = RET_INTEGER,
4124        .arg1_type      = ARG_ANYTHING,
4125        .arg2_type      = ARG_ANYTHING,
4126};
4127
4128BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
4129           u64, flags)
4130{
4131        return map->ops->map_redirect(map, ifindex, flags);
4132}
4133
4134static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
4135        .func           = bpf_xdp_redirect_map,
4136        .gpl_only       = false,
4137        .ret_type       = RET_INTEGER,
4138        .arg1_type      = ARG_CONST_MAP_PTR,
4139        .arg2_type      = ARG_ANYTHING,
4140        .arg3_type      = ARG_ANYTHING,
4141};
4142
4143static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
4144                                  unsigned long off, unsigned long len)
4145{
4146        void *ptr = skb_header_pointer(skb, off, len, dst_buff);
4147
4148        if (unlikely(!ptr))
4149                return len;
4150        if (ptr != dst_buff)
4151                memcpy(dst_buff, ptr, len);
4152
4153        return 0;
4154}
4155
4156BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
4157           u64, flags, void *, meta, u64, meta_size)
4158{
4159        u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4160
4161        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4162                return -EINVAL;
4163        if (unlikely(!skb || skb_size > skb->len))
4164                return -EFAULT;
4165
4166        return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
4167                                bpf_skb_copy);
4168}
4169
4170static const struct bpf_func_proto bpf_skb_event_output_proto = {
4171        .func           = bpf_skb_event_output,
4172        .gpl_only       = true,
4173        .ret_type       = RET_INTEGER,
4174        .arg1_type      = ARG_PTR_TO_CTX,
4175        .arg2_type      = ARG_CONST_MAP_PTR,
4176        .arg3_type      = ARG_ANYTHING,
4177        .arg4_type      = ARG_PTR_TO_MEM,
4178        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
4179};
4180
4181BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
4182
4183const struct bpf_func_proto bpf_skb_output_proto = {
4184        .func           = bpf_skb_event_output,
4185        .gpl_only       = true,
4186        .ret_type       = RET_INTEGER,
4187        .arg1_type      = ARG_PTR_TO_BTF_ID,
4188        .arg1_btf_id    = &bpf_skb_output_btf_ids[0],
4189        .arg2_type      = ARG_CONST_MAP_PTR,
4190        .arg3_type      = ARG_ANYTHING,
4191        .arg4_type      = ARG_PTR_TO_MEM,
4192        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
4193};
4194
4195static unsigned short bpf_tunnel_key_af(u64 flags)
4196{
4197        return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
4198}
4199
4200BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
4201           u32, size, u64, flags)
4202{
4203        const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4204        u8 compat[sizeof(struct bpf_tunnel_key)];
4205        void *to_orig = to;
4206        int err;
4207
4208        if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
4209                err = -EINVAL;
4210                goto err_clear;
4211        }
4212        if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
4213                err = -EPROTO;
4214                goto err_clear;
4215        }
4216        if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4217                err = -EINVAL;
4218                switch (size) {
4219                case offsetof(struct bpf_tunnel_key, tunnel_label):
4220                case offsetof(struct bpf_tunnel_key, tunnel_ext):
4221                        goto set_compat;
4222                case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4223                        /* Fixup deprecated structure layouts here, so we have
4224                         * a common path later on.
4225                         */
4226                        if (ip_tunnel_info_af(info) != AF_INET)
4227                                goto err_clear;
4228set_compat:
4229                        to = (struct bpf_tunnel_key *)compat;
4230                        break;
4231                default:
4232                        goto err_clear;
4233                }
4234        }
4235
4236        to->tunnel_id = be64_to_cpu(info->key.tun_id);
4237        to->tunnel_tos = info->key.tos;
4238        to->tunnel_ttl = info->key.ttl;
4239        to->tunnel_ext = 0;
4240
4241        if (flags & BPF_F_TUNINFO_IPV6) {
4242                memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
4243                       sizeof(to->remote_ipv6));
4244                to->tunnel_label = be32_to_cpu(info->key.label);
4245        } else {
4246                to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
4247                memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
4248                to->tunnel_label = 0;
4249        }
4250
4251        if (unlikely(size != sizeof(struct bpf_tunnel_key)))
4252                memcpy(to_orig, to, size);
4253
4254        return 0;
4255err_clear:
4256        memset(to_orig, 0, size);
4257        return err;
4258}
4259
4260static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
4261        .func           = bpf_skb_get_tunnel_key,
4262        .gpl_only       = false,
4263        .ret_type       = RET_INTEGER,
4264        .arg1_type      = ARG_PTR_TO_CTX,
4265        .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
4266        .arg3_type      = ARG_CONST_SIZE,
4267        .arg4_type      = ARG_ANYTHING,
4268};
4269
4270BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
4271{
4272        const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4273        int err;
4274
4275        if (unlikely(!info ||
4276                     !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
4277                err = -ENOENT;
4278                goto err_clear;
4279        }
4280        if (unlikely(size < info->options_len)) {
4281                err = -ENOMEM;
4282                goto err_clear;
4283        }
4284
4285        ip_tunnel_info_opts_get(to, info);
4286        if (size > info->options_len)
4287                memset(to + info->options_len, 0, size - info->options_len);
4288
4289        return info->options_len;
4290err_clear:
4291        memset(to, 0, size);
4292        return err;
4293}
4294
4295static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
4296        .func           = bpf_skb_get_tunnel_opt,
4297        .gpl_only       = false,
4298        .ret_type       = RET_INTEGER,
4299        .arg1_type      = ARG_PTR_TO_CTX,
4300        .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
4301        .arg3_type      = ARG_CONST_SIZE,
4302};
4303
4304static struct metadata_dst __percpu *md_dst;
4305
4306BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
4307           const struct bpf_tunnel_key *, from, u32, size, u64, flags)
4308{
4309        struct metadata_dst *md = this_cpu_ptr(md_dst);
4310        u8 compat[sizeof(struct bpf_tunnel_key)];
4311        struct ip_tunnel_info *info;
4312
4313        if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
4314                               BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
4315                return -EINVAL;
4316        if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4317                switch (size) {
4318                case offsetof(struct bpf_tunnel_key, tunnel_label):
4319                case offsetof(struct bpf_tunnel_key, tunnel_ext):
4320                case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4321                        /* Fixup deprecated structure layouts here, so we have
4322                         * a common path later on.
4323                         */
4324                        memcpy(compat, from, size);
4325                        memset(compat + size, 0, sizeof(compat) - size);
4326                        from = (const struct bpf_tunnel_key *) compat;
4327                        break;
4328                default:
4329                        return -EINVAL;
4330                }
4331        }
4332        if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
4333                     from->tunnel_ext))
4334                return -EINVAL;
4335
4336        skb_dst_drop(skb);
4337        dst_hold((struct dst_entry *) md);
4338        skb_dst_set(skb, (struct dst_entry *) md);
4339
4340        info = &md->u.tun_info;
4341        memset(info, 0, sizeof(*info));
4342        info->mode = IP_TUNNEL_INFO_TX;
4343
4344        info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
4345        if (flags & BPF_F_DONT_FRAGMENT)
4346                info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
4347        if (flags & BPF_F_ZERO_CSUM_TX)
4348                info->key.tun_flags &= ~TUNNEL_CSUM;
4349        if (flags & BPF_F_SEQ_NUMBER)
4350                info->key.tun_flags |= TUNNEL_SEQ;
4351
4352        info->key.tun_id = cpu_to_be64(from->tunnel_id);
4353        info->key.tos = from->tunnel_tos;
4354        info->key.ttl = from->tunnel_ttl;
4355
4356        if (flags & BPF_F_TUNINFO_IPV6) {
4357                info->mode |= IP_TUNNEL_INFO_IPV6;
4358                memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
4359                       sizeof(from->remote_ipv6));
4360                info->key.label = cpu_to_be32(from->tunnel_label) &
4361                                  IPV6_FLOWLABEL_MASK;
4362        } else {
4363                info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
4364        }
4365
4366        return 0;
4367}
4368
4369static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
4370        .func           = bpf_skb_set_tunnel_key,
4371        .gpl_only       = false,
4372        .ret_type       = RET_INTEGER,
4373        .arg1_type      = ARG_PTR_TO_CTX,
4374        .arg2_type      = ARG_PTR_TO_MEM,
4375        .arg3_type      = ARG_CONST_SIZE,
4376        .arg4_type      = ARG_ANYTHING,
4377};
4378
4379BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4380           const u8 *, from, u32, size)
4381{
4382        struct ip_tunnel_info *info = skb_tunnel_info(skb);
4383        const struct metadata_dst *md = this_cpu_ptr(md_dst);
4384
4385        if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
4386                return -EINVAL;
4387        if (unlikely(size > IP_TUNNEL_OPTS_MAX))
4388                return -ENOMEM;
4389
4390        ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
4391
4392        return 0;
4393}
4394
4395static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4396        .func           = bpf_skb_set_tunnel_opt,
4397        .gpl_only       = false,
4398        .ret_type       = RET_INTEGER,
4399        .arg1_type      = ARG_PTR_TO_CTX,
4400        .arg2_type      = ARG_PTR_TO_MEM,
4401        .arg3_type      = ARG_CONST_SIZE,
4402};
4403
4404static const struct bpf_func_proto *
4405bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
4406{
4407        if (!md_dst) {
4408                struct metadata_dst __percpu *tmp;
4409
4410                tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4411                                                METADATA_IP_TUNNEL,
4412                                                GFP_KERNEL);
4413                if (!tmp)
4414                        return NULL;
4415                if (cmpxchg(&md_dst, NULL, tmp))
4416                        metadata_dst_free_percpu(tmp);
4417        }
4418
4419        switch (which) {
4420        case BPF_FUNC_skb_set_tunnel_key:
4421                return &bpf_skb_set_tunnel_key_proto;
4422        case BPF_FUNC_skb_set_tunnel_opt:
4423                return &bpf_skb_set_tunnel_opt_proto;
4424        default:
4425                return NULL;
4426        }
4427}
4428
4429BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
4430           u32, idx)
4431{
4432        struct bpf_array *array = container_of(map, struct bpf_array, map);
4433        struct cgroup *cgrp;
4434        struct sock *sk;
4435
4436        sk = skb_to_full_sk(skb);
4437        if (!sk || !sk_fullsock(sk))
4438                return -ENOENT;
4439        if (unlikely(idx >= array->map.max_entries))
4440                return -E2BIG;
4441
4442        cgrp = READ_ONCE(array->ptrs[idx]);
4443        if (unlikely(!cgrp))
4444                return -EAGAIN;
4445
4446        return sk_under_cgroup_hierarchy(sk, cgrp);
4447}
4448
4449static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
4450        .func           = bpf_skb_under_cgroup,
4451        .gpl_only       = false,
4452        .ret_type       = RET_INTEGER,
4453        .arg1_type      = ARG_PTR_TO_CTX,
4454        .arg2_type      = ARG_CONST_MAP_PTR,
4455        .arg3_type      = ARG_ANYTHING,
4456};
4457
4458#ifdef CONFIG_SOCK_CGROUP_DATA
4459static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4460{
4461        struct cgroup *cgrp;
4462
4463        sk = sk_to_full_sk(sk);
4464        if (!sk || !sk_fullsock(sk))
4465                return 0;
4466
4467        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4468        return cgroup_id(cgrp);
4469}
4470
4471BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
4472{
4473        return __bpf_sk_cgroup_id(skb->sk);
4474}
4475
4476static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
4477        .func           = bpf_skb_cgroup_id,
4478        .gpl_only       = false,
4479        .ret_type       = RET_INTEGER,
4480        .arg1_type      = ARG_PTR_TO_CTX,
4481};
4482
4483static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4484                                              int ancestor_level)
4485{
4486        struct cgroup *ancestor;
4487        struct cgroup *cgrp;
4488
4489        sk = sk_to_full_sk(sk);
4490        if (!sk || !sk_fullsock(sk))
4491                return 0;
4492
4493        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4494        ancestor = cgroup_ancestor(cgrp, ancestor_level);
4495        if (!ancestor)
4496                return 0;
4497
4498        return cgroup_id(ancestor);
4499}
4500
4501BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
4502           ancestor_level)
4503{
4504        return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
4505}
4506
4507static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
4508        .func           = bpf_skb_ancestor_cgroup_id,
4509        .gpl_only       = false,
4510        .ret_type       = RET_INTEGER,
4511        .arg1_type      = ARG_PTR_TO_CTX,
4512        .arg2_type      = ARG_ANYTHING,
4513};
4514
4515BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
4516{
4517        return __bpf_sk_cgroup_id(sk);
4518}
4519
4520static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
4521        .func           = bpf_sk_cgroup_id,
4522        .gpl_only       = false,
4523        .ret_type       = RET_INTEGER,
4524        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4525};
4526
4527BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
4528{
4529        return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4530}
4531
4532static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
4533        .func           = bpf_sk_ancestor_cgroup_id,
4534        .gpl_only       = false,
4535        .ret_type       = RET_INTEGER,
4536        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4537        .arg2_type      = ARG_ANYTHING,
4538};
4539#endif
4540
4541static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
4542                                  unsigned long off, unsigned long len)
4543{
4544        memcpy(dst_buff, src_buff + off, len);
4545        return 0;
4546}
4547
4548BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
4549           u64, flags, void *, meta, u64, meta_size)
4550{
4551        u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4552
4553        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4554                return -EINVAL;
4555        if (unlikely(!xdp ||
4556                     xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
4557                return -EFAULT;
4558
4559        return bpf_event_output(map, flags, meta, meta_size, xdp->data,
4560                                xdp_size, bpf_xdp_copy);
4561}
4562
4563static const struct bpf_func_proto bpf_xdp_event_output_proto = {
4564        .func           = bpf_xdp_event_output,
4565        .gpl_only       = true,
4566        .ret_type       = RET_INTEGER,
4567        .arg1_type      = ARG_PTR_TO_CTX,
4568        .arg2_type      = ARG_CONST_MAP_PTR,
4569        .arg3_type      = ARG_ANYTHING,
4570        .arg4_type      = ARG_PTR_TO_MEM,
4571        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
4572};
4573
4574BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
4575
4576const struct bpf_func_proto bpf_xdp_output_proto = {
4577        .func           = bpf_xdp_event_output,
4578        .gpl_only       = true,
4579        .ret_type       = RET_INTEGER,
4580        .arg1_type      = ARG_PTR_TO_BTF_ID,
4581        .arg1_btf_id    = &bpf_xdp_output_btf_ids[0],
4582        .arg2_type      = ARG_CONST_MAP_PTR,
4583        .arg3_type      = ARG_ANYTHING,
4584        .arg4_type      = ARG_PTR_TO_MEM,
4585        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
4586};
4587
4588BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
4589{
4590        return skb->sk ? __sock_gen_cookie(skb->sk) : 0;
4591}
4592
4593static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
4594        .func           = bpf_get_socket_cookie,
4595        .gpl_only       = false,
4596        .ret_type       = RET_INTEGER,
4597        .arg1_type      = ARG_PTR_TO_CTX,
4598};
4599
4600BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4601{
4602        return __sock_gen_cookie(ctx->sk);
4603}
4604
4605static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
4606        .func           = bpf_get_socket_cookie_sock_addr,
4607        .gpl_only       = false,
4608        .ret_type       = RET_INTEGER,
4609        .arg1_type      = ARG_PTR_TO_CTX,
4610};
4611
4612BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
4613{
4614        return __sock_gen_cookie(ctx);
4615}
4616
4617static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
4618        .func           = bpf_get_socket_cookie_sock,
4619        .gpl_only       = false,
4620        .ret_type       = RET_INTEGER,
4621        .arg1_type      = ARG_PTR_TO_CTX,
4622};
4623
4624BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk)
4625{
4626        return sk ? sock_gen_cookie(sk) : 0;
4627}
4628
4629const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
4630        .func           = bpf_get_socket_ptr_cookie,
4631        .gpl_only       = false,
4632        .ret_type       = RET_INTEGER,
4633        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4634};
4635
4636BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4637{
4638        return __sock_gen_cookie(ctx->sk);
4639}
4640
4641static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
4642        .func           = bpf_get_socket_cookie_sock_ops,
4643        .gpl_only       = false,
4644        .ret_type       = RET_INTEGER,
4645        .arg1_type      = ARG_PTR_TO_CTX,
4646};
4647
4648static u64 __bpf_get_netns_cookie(struct sock *sk)
4649{
4650        const struct net *net = sk ? sock_net(sk) : &init_net;
4651
4652        return net->net_cookie;
4653}
4654
4655BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
4656{
4657        return __bpf_get_netns_cookie(ctx);
4658}
4659
4660static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
4661        .func           = bpf_get_netns_cookie_sock,
4662        .gpl_only       = false,
4663        .ret_type       = RET_INTEGER,
4664        .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
4665};
4666
4667BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4668{
4669        return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4670}
4671
4672static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
4673        .func           = bpf_get_netns_cookie_sock_addr,
4674        .gpl_only       = false,
4675        .ret_type       = RET_INTEGER,
4676        .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
4677};
4678
4679BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4680{
4681        return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4682}
4683
4684static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
4685        .func           = bpf_get_netns_cookie_sock_ops,
4686        .gpl_only       = false,
4687        .ret_type       = RET_INTEGER,
4688        .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
4689};
4690
4691BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
4692{
4693        return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4694}
4695
4696static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
4697        .func           = bpf_get_netns_cookie_sk_msg,
4698        .gpl_only       = false,
4699        .ret_type       = RET_INTEGER,
4700        .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
4701};
4702
4703BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
4704{
4705        struct sock *sk = sk_to_full_sk(skb->sk);
4706        kuid_t kuid;
4707
4708        if (!sk || !sk_fullsock(sk))
4709                return overflowuid;
4710        kuid = sock_net_uid(sock_net(sk), sk);
4711        return from_kuid_munged(sock_net(sk)->user_ns, kuid);
4712}
4713
4714static const struct bpf_func_proto bpf_get_socket_uid_proto = {
4715        .func           = bpf_get_socket_uid,
4716        .gpl_only       = false,
4717        .ret_type       = RET_INTEGER,
4718        .arg1_type      = ARG_PTR_TO_CTX,
4719};
4720
4721static int _bpf_setsockopt(struct sock *sk, int level, int optname,
4722                           char *optval, int optlen)
4723{
4724        char devname[IFNAMSIZ];
4725        int val, valbool;
4726        struct net *net;
4727        int ifindex;
4728        int ret = 0;
4729
4730        if (!sk_fullsock(sk))
4731                return -EINVAL;
4732
4733        sock_owned_by_me(sk);
4734
4735        if (level == SOL_SOCKET) {
4736                if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
4737                        return -EINVAL;
4738                val = *((int *)optval);
4739                valbool = val ? 1 : 0;
4740
4741                /* Only some socketops are supported */
4742                switch (optname) {
4743                case SO_RCVBUF:
4744                        val = min_t(u32, val, sysctl_rmem_max);
4745                        sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
4746                        WRITE_ONCE(sk->sk_rcvbuf,
4747                                   max_t(int, val * 2, SOCK_MIN_RCVBUF));
4748                        break;
4749                case SO_SNDBUF:
4750                        val = min_t(u32, val, sysctl_wmem_max);
4751                        sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
4752                        WRITE_ONCE(sk->sk_sndbuf,
4753                                   max_t(int, val * 2, SOCK_MIN_SNDBUF));
4754                        break;
4755                case SO_MAX_PACING_RATE: /* 32bit version */
4756                        if (val != ~0U)
4757                                cmpxchg(&sk->sk_pacing_status,
4758                                        SK_PACING_NONE,
4759                                        SK_PACING_NEEDED);
4760                        sk->sk_max_pacing_rate = (val == ~0U) ?
4761                                                 ~0UL : (unsigned int)val;
4762                        sk->sk_pacing_rate = min(sk->sk_pacing_rate,
4763                                                 sk->sk_max_pacing_rate);
4764                        break;
4765                case SO_PRIORITY:
4766                        sk->sk_priority = val;
4767                        break;
4768                case SO_RCVLOWAT:
4769                        if (val < 0)
4770                                val = INT_MAX;
4771                        WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
4772                        break;
4773                case SO_MARK:
4774                        if (sk->sk_mark != val) {
4775                                sk->sk_mark = val;
4776                                sk_dst_reset(sk);
4777                        }
4778                        break;
4779                case SO_BINDTODEVICE:
4780                        optlen = min_t(long, optlen, IFNAMSIZ - 1);
4781                        strncpy(devname, optval, optlen);
4782                        devname[optlen] = 0;
4783
4784                        ifindex = 0;
4785                        if (devname[0] != '\0') {
4786                                struct net_device *dev;
4787
4788                                ret = -ENODEV;
4789
4790                                net = sock_net(sk);
4791                                dev = dev_get_by_name(net, devname);
4792                                if (!dev)
4793                                        break;
4794                                ifindex = dev->ifindex;
4795                                dev_put(dev);
4796                        }
4797                        fallthrough;
4798                case SO_BINDTOIFINDEX:
4799                        if (optname == SO_BINDTOIFINDEX)
4800                                ifindex = val;
4801                        ret = sock_bindtoindex(sk, ifindex, false);
4802                        break;
4803                case SO_KEEPALIVE:
4804                        if (sk->sk_prot->keepalive)
4805                                sk->sk_prot->keepalive(sk, valbool);
4806                        sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
4807                        break;
4808                case SO_REUSEPORT:
4809                        sk->sk_reuseport = valbool;
4810                        break;
4811                default:
4812                        ret = -EINVAL;
4813                }
4814#ifdef CONFIG_INET
4815        } else if (level == SOL_IP) {
4816                if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4817                        return -EINVAL;
4818
4819                val = *((int *)optval);
4820                /* Only some options are supported */
4821                switch (optname) {
4822                case IP_TOS:
4823                        if (val < -1 || val > 0xff) {
4824                                ret = -EINVAL;
4825                        } else {
4826                                struct inet_sock *inet = inet_sk(sk);
4827
4828                                if (val == -1)
4829                                        val = 0;
4830                                inet->tos = val;
4831                        }
4832                        break;
4833                default:
4834                        ret = -EINVAL;
4835                }
4836#if IS_ENABLED(CONFIG_IPV6)
4837        } else if (level == SOL_IPV6) {
4838                if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4839                        return -EINVAL;
4840
4841                val = *((int *)optval);
4842                /* Only some options are supported */
4843                switch (optname) {
4844                case IPV6_TCLASS:
4845                        if (val < -1 || val > 0xff) {
4846                                ret = -EINVAL;
4847                        } else {
4848                                struct ipv6_pinfo *np = inet6_sk(sk);
4849
4850                                if (val == -1)
4851                                        val = 0;
4852                                np->tclass = val;
4853                        }
4854                        break;
4855                default:
4856                        ret = -EINVAL;
4857                }
4858#endif
4859        } else if (level == SOL_TCP &&
4860                   sk->sk_prot->setsockopt == tcp_setsockopt) {
4861                if (optname == TCP_CONGESTION) {
4862                        char name[TCP_CA_NAME_MAX];
4863
4864                        strncpy(name, optval, min_t(long, optlen,
4865                                                    TCP_CA_NAME_MAX-1));
4866                        name[TCP_CA_NAME_MAX-1] = 0;
4867                        ret = tcp_set_congestion_control(sk, name, false, true);
4868                } else {
4869                        struct inet_connection_sock *icsk = inet_csk(sk);
4870                        struct tcp_sock *tp = tcp_sk(sk);
4871                        unsigned long timeout;
4872
4873                        if (optlen != sizeof(int))
4874                                return -EINVAL;
4875
4876                        val = *((int *)optval);
4877                        /* Only some options are supported */
4878                        switch (optname) {
4879                        case TCP_BPF_IW:
4880                                if (val <= 0 || tp->data_segs_out > tp->syn_data)
4881                                        ret = -EINVAL;
4882                                else
4883                                        tp->snd_cwnd = val;
4884                                break;
4885                        case TCP_BPF_SNDCWND_CLAMP:
4886                                if (val <= 0) {
4887                                        ret = -EINVAL;
4888                                } else {
4889                                        tp->snd_cwnd_clamp = val;
4890                                        tp->snd_ssthresh = val;
4891                                }
4892                                break;
4893                        case TCP_BPF_DELACK_MAX:
4894                                timeout = usecs_to_jiffies(val);
4895                                if (timeout > TCP_DELACK_MAX ||
4896                                    timeout < TCP_TIMEOUT_MIN)
4897                                        return -EINVAL;
4898                                inet_csk(sk)->icsk_delack_max = timeout;
4899                                break;
4900                        case TCP_BPF_RTO_MIN:
4901                                timeout = usecs_to_jiffies(val);
4902                                if (timeout > TCP_RTO_MIN ||
4903                                    timeout < TCP_TIMEOUT_MIN)
4904                                        return -EINVAL;
4905                                inet_csk(sk)->icsk_rto_min = timeout;
4906                                break;
4907                        case TCP_SAVE_SYN:
4908                                if (val < 0 || val > 1)
4909                                        ret = -EINVAL;
4910                                else
4911                                        tp->save_syn = val;
4912                                break;
4913                        case TCP_KEEPIDLE:
4914                                ret = tcp_sock_set_keepidle_locked(sk, val);
4915                                break;
4916                        case TCP_KEEPINTVL:
4917                                if (val < 1 || val > MAX_TCP_KEEPINTVL)
4918                                        ret = -EINVAL;
4919                                else
4920                                        tp->keepalive_intvl = val * HZ;
4921                                break;
4922                        case TCP_KEEPCNT:
4923                                if (val < 1 || val > MAX_TCP_KEEPCNT)
4924                                        ret = -EINVAL;
4925                                else
4926                                        tp->keepalive_probes = val;
4927                                break;
4928                        case TCP_SYNCNT:
4929                                if (val < 1 || val > MAX_TCP_SYNCNT)
4930                                        ret = -EINVAL;
4931                                else
4932                                        icsk->icsk_syn_retries = val;
4933                                break;
4934                        case TCP_USER_TIMEOUT:
4935                                if (val < 0)
4936                                        ret = -EINVAL;
4937                                else
4938                                        icsk->icsk_user_timeout = val;
4939                                break;
4940                        case TCP_NOTSENT_LOWAT:
4941                                tp->notsent_lowat = val;
4942                                sk->sk_write_space(sk);
4943                                break;
4944                        case TCP_WINDOW_CLAMP:
4945                                ret = tcp_set_window_clamp(sk, val);
4946                                break;
4947                        default:
4948                                ret = -EINVAL;
4949                        }
4950                }
4951#endif
4952        } else {
4953                ret = -EINVAL;
4954        }
4955        return ret;
4956}
4957
4958static int _bpf_getsockopt(struct sock *sk, int level, int optname,
4959                           char *optval, int optlen)
4960{
4961        if (!sk_fullsock(sk))
4962                goto err_clear;
4963
4964        sock_owned_by_me(sk);
4965
4966        if (level == SOL_SOCKET) {
4967                if (optlen != sizeof(int))
4968                        goto err_clear;
4969
4970                switch (optname) {
4971                case SO_MARK:
4972                        *((int *)optval) = sk->sk_mark;
4973                        break;
4974                case SO_PRIORITY:
4975                        *((int *)optval) = sk->sk_priority;
4976                        break;
4977                case SO_BINDTOIFINDEX:
4978                        *((int *)optval) = sk->sk_bound_dev_if;
4979                        break;
4980                case SO_REUSEPORT:
4981                        *((int *)optval) = sk->sk_reuseport;
4982                        break;
4983                default:
4984                        goto err_clear;
4985                }
4986#ifdef CONFIG_INET
4987        } else if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
4988                struct inet_connection_sock *icsk;
4989                struct tcp_sock *tp;
4990
4991                switch (optname) {
4992                case TCP_CONGESTION:
4993                        icsk = inet_csk(sk);
4994
4995                        if (!icsk->icsk_ca_ops || optlen <= 1)
4996                                goto err_clear;
4997                        strncpy(optval, icsk->icsk_ca_ops->name, optlen);
4998                        optval[optlen - 1] = 0;
4999                        break;
5000                case TCP_SAVED_SYN:
5001                        tp = tcp_sk(sk);
5002
5003                        if (optlen <= 0 || !tp->saved_syn ||
5004                            optlen > tcp_saved_syn_len(tp->saved_syn))
5005                                goto err_clear;
5006                        memcpy(optval, tp->saved_syn->data, optlen);
5007                        break;
5008                default:
5009                        goto err_clear;
5010                }
5011        } else if (level == SOL_IP) {
5012                struct inet_sock *inet = inet_sk(sk);
5013
5014                if (optlen != sizeof(int) || sk->sk_family != AF_INET)
5015                        goto err_clear;
5016
5017                /* Only some options are supported */
5018                switch (optname) {
5019                case IP_TOS:
5020                        *((int *)optval) = (int)inet->tos;
5021                        break;
5022                default:
5023                        goto err_clear;
5024                }
5025#if IS_ENABLED(CONFIG_IPV6)
5026        } else if (level == SOL_IPV6) {
5027                struct ipv6_pinfo *np = inet6_sk(sk);
5028
5029                if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
5030                        goto err_clear;
5031
5032                /* Only some options are supported */
5033                switch (optname) {
5034                case IPV6_TCLASS:
5035                        *((int *)optval) = (int)np->tclass;
5036                        break;
5037                default:
5038                        goto err_clear;
5039                }
5040#endif
5041#endif
5042        } else {
5043                goto err_clear;
5044        }
5045        return 0;
5046err_clear:
5047        memset(optval, 0, optlen);
5048        return -EINVAL;
5049}
5050
5051BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
5052           int, optname, char *, optval, int, optlen)
5053{
5054        if (level == SOL_TCP && optname == TCP_CONGESTION) {
5055                if (optlen >= sizeof("cdg") - 1 &&
5056                    !strncmp("cdg", optval, optlen))
5057                        return -ENOTSUPP;
5058        }
5059
5060        return _bpf_setsockopt(sk, level, optname, optval, optlen);
5061}
5062
5063const struct bpf_func_proto bpf_sk_setsockopt_proto = {
5064        .func           = bpf_sk_setsockopt,
5065        .gpl_only       = false,
5066        .ret_type       = RET_INTEGER,
5067        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5068        .arg2_type      = ARG_ANYTHING,
5069        .arg3_type      = ARG_ANYTHING,
5070        .arg4_type      = ARG_PTR_TO_MEM,
5071        .arg5_type      = ARG_CONST_SIZE,
5072};
5073
5074BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
5075           int, optname, char *, optval, int, optlen)
5076{
5077        return _bpf_getsockopt(sk, level, optname, optval, optlen);
5078}
5079
5080const struct bpf_func_proto bpf_sk_getsockopt_proto = {
5081        .func           = bpf_sk_getsockopt,
5082        .gpl_only       = false,
5083        .ret_type       = RET_INTEGER,
5084        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5085        .arg2_type      = ARG_ANYTHING,
5086        .arg3_type      = ARG_ANYTHING,
5087        .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
5088        .arg5_type      = ARG_CONST_SIZE,
5089};
5090
5091BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
5092           int, level, int, optname, char *, optval, int, optlen)
5093{
5094        return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
5095}
5096
5097static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
5098        .func           = bpf_sock_addr_setsockopt,
5099        .gpl_only       = false,
5100        .ret_type       = RET_INTEGER,
5101        .arg1_type      = ARG_PTR_TO_CTX,
5102        .arg2_type      = ARG_ANYTHING,
5103        .arg3_type      = ARG_ANYTHING,
5104        .arg4_type      = ARG_PTR_TO_MEM,
5105        .arg5_type      = ARG_CONST_SIZE,
5106};
5107
5108BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
5109           int, level, int, optname, char *, optval, int, optlen)
5110{
5111        return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
5112}
5113
5114static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
5115        .func           = bpf_sock_addr_getsockopt,
5116        .gpl_only       = false,
5117        .ret_type       = RET_INTEGER,
5118        .arg1_type      = ARG_PTR_TO_CTX,
5119        .arg2_type      = ARG_ANYTHING,
5120        .arg3_type      = ARG_ANYTHING,
5121        .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
5122        .arg5_type      = ARG_CONST_SIZE,
5123};
5124
5125BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5126           int, level, int, optname, char *, optval, int, optlen)
5127{
5128        return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
5129}
5130
5131static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
5132        .func           = bpf_sock_ops_setsockopt,
5133        .gpl_only       = false,
5134        .ret_type       = RET_INTEGER,
5135        .arg1_type      = ARG_PTR_TO_CTX,
5136        .arg2_type      = ARG_ANYTHING,
5137        .arg3_type      = ARG_ANYTHING,
5138        .arg4_type      = ARG_PTR_TO_MEM,
5139        .arg5_type      = ARG_CONST_SIZE,
5140};
5141
5142static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
5143                                int optname, const u8 **start)
5144{
5145        struct sk_buff *syn_skb = bpf_sock->syn_skb;
5146        const u8 *hdr_start;
5147        int ret;
5148
5149        if (syn_skb) {
5150                /* sk is a request_sock here */
5151
5152                if (optname == TCP_BPF_SYN) {
5153                        hdr_start = syn_skb->data;
5154                        ret = tcp_hdrlen(syn_skb);
5155                } else if (optname == TCP_BPF_SYN_IP) {
5156                        hdr_start = skb_network_header(syn_skb);
5157                        ret = skb_network_header_len(syn_skb) +
5158                                tcp_hdrlen(syn_skb);
5159                } else {
5160                        /* optname == TCP_BPF_SYN_MAC */
5161                        hdr_start = skb_mac_header(syn_skb);
5162                        ret = skb_mac_header_len(syn_skb) +
5163                                skb_network_header_len(syn_skb) +
5164                                tcp_hdrlen(syn_skb);
5165                }
5166        } else {
5167                struct sock *sk = bpf_sock->sk;
5168                struct saved_syn *saved_syn;
5169
5170                if (sk->sk_state == TCP_NEW_SYN_RECV)
5171                        /* synack retransmit. bpf_sock->syn_skb will
5172                         * not be available.  It has to resort to
5173                         * saved_syn (if it is saved).
5174                         */
5175                        saved_syn = inet_reqsk(sk)->saved_syn;
5176                else
5177                        saved_syn = tcp_sk(sk)->saved_syn;
5178
5179                if (!saved_syn)
5180                        return -ENOENT;
5181
5182                if (optname == TCP_BPF_SYN) {
5183                        hdr_start = saved_syn->data +
5184                                saved_syn->mac_hdrlen +
5185                                saved_syn->network_hdrlen;
5186                        ret = saved_syn->tcp_hdrlen;
5187                } else if (optname == TCP_BPF_SYN_IP) {
5188                        hdr_start = saved_syn->data +
5189                                saved_syn->mac_hdrlen;
5190                        ret = saved_syn->network_hdrlen +
5191                                saved_syn->tcp_hdrlen;
5192                } else {
5193                        /* optname == TCP_BPF_SYN_MAC */
5194
5195                        /* TCP_SAVE_SYN may not have saved the mac hdr */
5196                        if (!saved_syn->mac_hdrlen)
5197                                return -ENOENT;
5198
5199                        hdr_start = saved_syn->data;
5200                        ret = saved_syn->mac_hdrlen +
5201                                saved_syn->network_hdrlen +
5202                                saved_syn->tcp_hdrlen;
5203                }
5204        }
5205
5206        *start = hdr_start;
5207        return ret;
5208}
5209
5210BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5211           int, level, int, optname, char *, optval, int, optlen)
5212{
5213        if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
5214            optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
5215                int ret, copy_len = 0;
5216                const u8 *start;
5217
5218                ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
5219                if (ret > 0) {
5220                        copy_len = ret;
5221                        if (optlen < copy_len) {
5222                                copy_len = optlen;
5223                                ret = -ENOSPC;
5224                        }
5225
5226                        memcpy(optval, start, copy_len);
5227                }
5228
5229                /* Zero out unused buffer at the end */
5230                memset(optval + copy_len, 0, optlen - copy_len);
5231
5232                return ret;
5233        }
5234
5235        return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
5236}
5237
5238static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
5239        .func           = bpf_sock_ops_getsockopt,
5240        .gpl_only       = false,
5241        .ret_type       = RET_INTEGER,
5242        .arg1_type      = ARG_PTR_TO_CTX,
5243        .arg2_type      = ARG_ANYTHING,
5244        .arg3_type      = ARG_ANYTHING,
5245        .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
5246        .arg5_type      = ARG_CONST_SIZE,
5247};
5248
5249BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
5250           int, argval)
5251{
5252        struct sock *sk = bpf_sock->sk;
5253        int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
5254
5255        if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
5256                return -EINVAL;
5257
5258        tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
5259
5260        return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
5261}
5262
5263static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
5264        .func           = bpf_sock_ops_cb_flags_set,
5265        .gpl_only       = false,
5266        .ret_type       = RET_INTEGER,
5267        .arg1_type      = ARG_PTR_TO_CTX,
5268        .arg2_type      = ARG_ANYTHING,
5269};
5270
5271const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
5272EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
5273
5274BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
5275           int, addr_len)
5276{
5277#ifdef CONFIG_INET
5278        struct sock *sk = ctx->sk;
5279        u32 flags = BIND_FROM_BPF;
5280        int err;
5281
5282        err = -EINVAL;
5283        if (addr_len < offsetofend(struct sockaddr, sa_family))
5284                return err;
5285        if (addr->sa_family == AF_INET) {
5286                if (addr_len < sizeof(struct sockaddr_in))
5287                        return err;
5288                if (((struct sockaddr_in *)addr)->sin_port == htons(0))
5289                        flags |= BIND_FORCE_ADDRESS_NO_PORT;
5290                return __inet_bind(sk, addr, addr_len, flags);
5291#if IS_ENABLED(CONFIG_IPV6)
5292        } else if (addr->sa_family == AF_INET6) {
5293                if (addr_len < SIN6_LEN_RFC2133)
5294                        return err;
5295                if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
5296                        flags |= BIND_FORCE_ADDRESS_NO_PORT;
5297                /* ipv6_bpf_stub cannot be NULL, since it's called from
5298                 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
5299                 */
5300                return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
5301#endif /* CONFIG_IPV6 */
5302        }
5303#endif /* CONFIG_INET */
5304
5305        return -EAFNOSUPPORT;
5306}
5307
5308static const struct bpf_func_proto bpf_bind_proto = {
5309        .func           = bpf_bind,
5310        .gpl_only       = false,
5311        .ret_type       = RET_INTEGER,
5312        .arg1_type      = ARG_PTR_TO_CTX,
5313        .arg2_type      = ARG_PTR_TO_MEM,
5314        .arg3_type      = ARG_CONST_SIZE,
5315};
5316
5317#ifdef CONFIG_XFRM
5318BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
5319           struct bpf_xfrm_state *, to, u32, size, u64, flags)
5320{
5321        const struct sec_path *sp = skb_sec_path(skb);
5322        const struct xfrm_state *x;
5323
5324        if (!sp || unlikely(index >= sp->len || flags))
5325                goto err_clear;
5326
5327        x = sp->xvec[index];
5328
5329        if (unlikely(size != sizeof(struct bpf_xfrm_state)))
5330                goto err_clear;
5331
5332        to->reqid = x->props.reqid;
5333        to->spi = x->id.spi;
5334        to->family = x->props.family;
5335        to->ext = 0;
5336
5337        if (to->family == AF_INET6) {
5338                memcpy(to->remote_ipv6, x->props.saddr.a6,
5339                       sizeof(to->remote_ipv6));
5340        } else {
5341                to->remote_ipv4 = x->props.saddr.a4;
5342                memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
5343        }
5344
5345        return 0;
5346err_clear:
5347        memset(to, 0, size);
5348        return -EINVAL;
5349}
5350
5351static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
5352        .func           = bpf_skb_get_xfrm_state,
5353        .gpl_only       = false,
5354        .ret_type       = RET_INTEGER,
5355        .arg1_type      = ARG_PTR_TO_CTX,
5356        .arg2_type      = ARG_ANYTHING,
5357        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
5358        .arg4_type      = ARG_CONST_SIZE,
5359        .arg5_type      = ARG_ANYTHING,
5360};
5361#endif
5362
5363#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
5364static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
5365                                  const struct neighbour *neigh,
5366                                  const struct net_device *dev, u32 mtu)
5367{
5368        memcpy(params->dmac, neigh->ha, ETH_ALEN);
5369        memcpy(params->smac, dev->dev_addr, ETH_ALEN);
5370        params->h_vlan_TCI = 0;
5371        params->h_vlan_proto = 0;
5372        if (mtu)
5373                params->mtu_result = mtu; /* union with tot_len */
5374
5375        return 0;
5376}
5377#endif
5378
5379#if IS_ENABLED(CONFIG_INET)
5380static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
5381                               u32 flags, bool check_mtu)
5382{
5383        struct fib_nh_common *nhc;
5384        struct in_device *in_dev;
5385        struct neighbour *neigh;
5386        struct net_device *dev;
5387        struct fib_result res;
5388        struct flowi4 fl4;
5389        u32 mtu = 0;
5390        int err;
5391
5392        dev = dev_get_by_index_rcu(net, params->ifindex);
5393        if (unlikely(!dev))
5394                return -ENODEV;
5395
5396        /* verify forwarding is enabled on this interface */
5397        in_dev = __in_dev_get_rcu(dev);
5398        if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
5399                return BPF_FIB_LKUP_RET_FWD_DISABLED;
5400
5401        if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5402                fl4.flowi4_iif = 1;
5403                fl4.flowi4_oif = params->ifindex;
5404        } else {
5405                fl4.flowi4_iif = params->ifindex;
5406                fl4.flowi4_oif = 0;
5407        }
5408        fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
5409        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
5410        fl4.flowi4_flags = 0;
5411
5412        fl4.flowi4_proto = params->l4_protocol;
5413        fl4.daddr = params->ipv4_dst;
5414        fl4.saddr = params->ipv4_src;
5415        fl4.fl4_sport = params->sport;
5416        fl4.fl4_dport = params->dport;
5417        fl4.flowi4_multipath_hash = 0;
5418
5419        if (flags & BPF_FIB_LOOKUP_DIRECT) {
5420                u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5421                struct fib_table *tb;
5422
5423                tb = fib_get_table(net, tbid);
5424                if (unlikely(!tb))
5425                        return BPF_FIB_LKUP_RET_NOT_FWDED;
5426
5427                err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
5428        } else {
5429                fl4.flowi4_mark = 0;
5430                fl4.flowi4_secid = 0;
5431                fl4.flowi4_tun_key.tun_id = 0;
5432                fl4.flowi4_uid = sock_net_uid(net, NULL);
5433
5434                err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
5435        }
5436
5437        if (err) {
5438                /* map fib lookup errors to RTN_ type */
5439                if (err == -EINVAL)
5440                        return BPF_FIB_LKUP_RET_BLACKHOLE;
5441                if (err == -EHOSTUNREACH)
5442                        return BPF_FIB_LKUP_RET_UNREACHABLE;
5443                if (err == -EACCES)
5444                        return BPF_FIB_LKUP_RET_PROHIBIT;
5445
5446                return BPF_FIB_LKUP_RET_NOT_FWDED;
5447        }
5448
5449        if (res.type != RTN_UNICAST)
5450                return BPF_FIB_LKUP_RET_NOT_FWDED;
5451
5452        if (fib_info_num_path(res.fi) > 1)
5453                fib_select_path(net, &res, &fl4, NULL);
5454
5455        if (check_mtu) {
5456                mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
5457                if (params->tot_len > mtu) {
5458                        params->mtu_result = mtu; /* union with tot_len */
5459                        return BPF_FIB_LKUP_RET_FRAG_NEEDED;
5460                }
5461        }
5462
5463        nhc = res.nhc;
5464
5465        /* do not handle lwt encaps right now */
5466        if (nhc->nhc_lwtstate)
5467                return BPF_FIB_LKUP_RET_UNSUPP_LWT;
5468
5469        dev = nhc->nhc_dev;
5470
5471        params->rt_metric = res.fi->fib_priority;
5472        params->ifindex = dev->ifindex;
5473
5474        /* xdp and cls_bpf programs are run in RCU-bh so
5475         * rcu_read_lock_bh is not needed here
5476         */
5477        if (likely(nhc->nhc_gw_family != AF_INET6)) {
5478                if (nhc->nhc_gw_family)
5479                        params->ipv4_dst = nhc->nhc_gw.ipv4;
5480
5481                neigh = __ipv4_neigh_lookup_noref(dev,
5482                                                 (__force u32)params->ipv4_dst);
5483        } else {
5484                struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
5485
5486                params->family = AF_INET6;
5487                *dst = nhc->nhc_gw.ipv6;
5488                neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
5489        }
5490
5491        if (!neigh)
5492                return BPF_FIB_LKUP_RET_NO_NEIGH;
5493
5494        return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
5495}
5496#endif
5497
5498#if IS_ENABLED(CONFIG_IPV6)
5499static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
5500                               u32 flags, bool check_mtu)
5501{
5502        struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
5503        struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
5504        struct fib6_result res = {};
5505        struct neighbour *neigh;
5506        struct net_device *dev;
5507        struct inet6_dev *idev;
5508        struct flowi6 fl6;
5509        int strict = 0;
5510        int oif, err;
5511        u32 mtu = 0;
5512
5513        /* link local addresses are never forwarded */
5514        if (rt6_need_strict(dst) || rt6_need_strict(src))
5515                return BPF_FIB_LKUP_RET_NOT_FWDED;
5516
5517        dev = dev_get_by_index_rcu(net, params->ifindex);
5518        if (unlikely(!dev))
5519                return -ENODEV;
5520
5521        idev = __in6_dev_get_safely(dev);
5522        if (unlikely(!idev || !idev->cnf.forwarding))
5523                return BPF_FIB_LKUP_RET_FWD_DISABLED;
5524
5525        if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5526                fl6.flowi6_iif = 1;
5527                oif = fl6.flowi6_oif = params->ifindex;
5528        } else {
5529                oif = fl6.flowi6_iif = params->ifindex;
5530                fl6.flowi6_oif = 0;
5531                strict = RT6_LOOKUP_F_HAS_SADDR;
5532        }
5533        fl6.flowlabel = params->flowinfo;
5534        fl6.flowi6_scope = 0;
5535        fl6.flowi6_flags = 0;
5536        fl6.mp_hash = 0;
5537
5538        fl6.flowi6_proto = params->l4_protocol;
5539        fl6.daddr = *dst;
5540        fl6.saddr = *src;
5541        fl6.fl6_sport = params->sport;
5542        fl6.fl6_dport = params->dport;
5543
5544        if (flags & BPF_FIB_LOOKUP_DIRECT) {
5545                u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5546                struct fib6_table *tb;
5547
5548                tb = ipv6_stub->fib6_get_table(net, tbid);
5549                if (unlikely(!tb))
5550                        return BPF_FIB_LKUP_RET_NOT_FWDED;
5551
5552                err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
5553                                                   strict);
5554        } else {
5555                fl6.flowi6_mark = 0;
5556                fl6.flowi6_secid = 0;
5557                fl6.flowi6_tun_key.tun_id = 0;
5558                fl6.flowi6_uid = sock_net_uid(net, NULL);
5559
5560                err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
5561        }
5562
5563        if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
5564                     res.f6i == net->ipv6.fib6_null_entry))
5565                return BPF_FIB_LKUP_RET_NOT_FWDED;
5566
5567        switch (res.fib6_type) {
5568        /* only unicast is forwarded */
5569        case RTN_UNICAST:
5570                break;
5571        case RTN_BLACKHOLE:
5572                return BPF_FIB_LKUP_RET_BLACKHOLE;
5573        case RTN_UNREACHABLE:
5574                return BPF_FIB_LKUP_RET_UNREACHABLE;
5575        case RTN_PROHIBIT:
5576                return BPF_FIB_LKUP_RET_PROHIBIT;
5577        default:
5578                return BPF_FIB_LKUP_RET_NOT_FWDED;
5579        }
5580
5581        ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
5582                                    fl6.flowi6_oif != 0, NULL, strict);
5583
5584        if (check_mtu) {
5585                mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
5586                if (params->tot_len > mtu) {
5587                        params->mtu_result = mtu; /* union with tot_len */
5588                        return BPF_FIB_LKUP_RET_FRAG_NEEDED;
5589                }
5590        }
5591
5592        if (res.nh->fib_nh_lws)
5593                return BPF_FIB_LKUP_RET_UNSUPP_LWT;
5594
5595        if (res.nh->fib_nh_gw_family)
5596                *dst = res.nh->fib_nh_gw6;
5597
5598        dev = res.nh->fib_nh_dev;
5599        params->rt_metric = res.f6i->fib6_metric;
5600        params->ifindex = dev->ifindex;
5601
5602        /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
5603         * not needed here.
5604         */
5605        neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
5606        if (!neigh)
5607                return BPF_FIB_LKUP_RET_NO_NEIGH;
5608
5609        return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
5610}
5611#endif
5612
5613BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
5614           struct bpf_fib_lookup *, params, int, plen, u32, flags)
5615{
5616        if (plen < sizeof(*params))
5617                return -EINVAL;
5618
5619        if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5620                return -EINVAL;
5621
5622        switch (params->family) {
5623#if IS_ENABLED(CONFIG_INET)
5624        case AF_INET:
5625                return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
5626                                           flags, true);
5627#endif
5628#if IS_ENABLED(CONFIG_IPV6)
5629        case AF_INET6:
5630                return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
5631                                           flags, true);
5632#endif
5633        }
5634        return -EAFNOSUPPORT;
5635}
5636
5637static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
5638        .func           = bpf_xdp_fib_lookup,
5639        .gpl_only       = true,
5640        .ret_type       = RET_INTEGER,
5641        .arg1_type      = ARG_PTR_TO_CTX,
5642        .arg2_type      = ARG_PTR_TO_MEM,
5643        .arg3_type      = ARG_CONST_SIZE,
5644        .arg4_type      = ARG_ANYTHING,
5645};
5646
5647BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
5648           struct bpf_fib_lookup *, params, int, plen, u32, flags)
5649{
5650        struct net *net = dev_net(skb->dev);
5651        int rc = -EAFNOSUPPORT;
5652        bool check_mtu = false;
5653
5654        if (plen < sizeof(*params))
5655                return -EINVAL;
5656
5657        if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5658                return -EINVAL;
5659
5660        if (params->tot_len)
5661                check_mtu = true;
5662
5663        switch (params->family) {
5664#if IS_ENABLED(CONFIG_INET)
5665        case AF_INET:
5666                rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
5667                break;
5668#endif
5669#if IS_ENABLED(CONFIG_IPV6)
5670        case AF_INET6:
5671                rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
5672                break;
5673#endif
5674        }
5675
5676        if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
5677                struct net_device *dev;
5678
5679                /* When tot_len isn't provided by user, check skb
5680                 * against MTU of FIB lookup resulting net_device
5681                 */
5682                dev = dev_get_by_index_rcu(net, params->ifindex);
5683                if (!is_skb_forwardable(dev, skb))
5684                        rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
5685
5686                params->mtu_result = dev->mtu; /* union with tot_len */
5687        }
5688
5689        return rc;
5690}
5691
5692static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
5693        .func           = bpf_skb_fib_lookup,
5694        .gpl_only       = true,
5695        .ret_type       = RET_INTEGER,
5696        .arg1_type      = ARG_PTR_TO_CTX,
5697        .arg2_type      = ARG_PTR_TO_MEM,
5698        .arg3_type      = ARG_CONST_SIZE,
5699        .arg4_type      = ARG_ANYTHING,
5700};
5701
5702static struct net_device *__dev_via_ifindex(struct net_device *dev_curr,
5703                                            u32 ifindex)
5704{
5705        struct net *netns = dev_net(dev_curr);
5706
5707        /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */
5708        if (ifindex == 0)
5709                return dev_curr;
5710
5711        return dev_get_by_index_rcu(netns, ifindex);
5712}
5713
5714BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
5715           u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
5716{
5717        int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
5718        struct net_device *dev = skb->dev;
5719        int skb_len, dev_len;
5720        int mtu;
5721
5722        if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
5723                return -EINVAL;
5724
5725        if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
5726                return -EINVAL;
5727
5728        dev = __dev_via_ifindex(dev, ifindex);
5729        if (unlikely(!dev))
5730                return -ENODEV;
5731
5732        mtu = READ_ONCE(dev->mtu);
5733
5734        dev_len = mtu + dev->hard_header_len;
5735
5736        /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
5737        skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;
5738
5739        skb_len += len_diff; /* minus result pass check */
5740        if (skb_len <= dev_len) {
5741                ret = BPF_MTU_CHK_RET_SUCCESS;
5742                goto out;
5743        }
5744        /* At this point, skb->len exceed MTU, but as it include length of all
5745         * segments, it can still be below MTU.  The SKB can possibly get
5746         * re-segmented in transmit path (see validate_xmit_skb).  Thus, user
5747         * must choose if segs are to be MTU checked.
5748         */
5749        if (skb_is_gso(skb)) {
5750                ret = BPF_MTU_CHK_RET_SUCCESS;
5751
5752                if (flags & BPF_MTU_CHK_SEGS &&
5753                    !skb_gso_validate_network_len(skb, mtu))
5754                        ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
5755        }
5756out:
5757        /* BPF verifier guarantees valid pointer */
5758        *mtu_len = mtu;
5759
5760        return ret;
5761}
5762
5763BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
5764           u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
5765{
5766        struct net_device *dev = xdp->rxq->dev;
5767        int xdp_len = xdp->data_end - xdp->data;
5768        int ret = BPF_MTU_CHK_RET_SUCCESS;
5769        int mtu, dev_len;
5770
5771        /* XDP variant doesn't support multi-buffer segment check (yet) */
5772        if (unlikely(flags))
5773                return -EINVAL;
5774
5775        dev = __dev_via_ifindex(dev, ifindex);
5776        if (unlikely(!dev))
5777                return -ENODEV;
5778
5779        mtu = READ_ONCE(dev->mtu);
5780
5781        /* Add L2-header as dev MTU is L3 size */
5782        dev_len = mtu + dev->hard_header_len;
5783
5784        /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
5785        if (*mtu_len)
5786                xdp_len = *mtu_len + dev->hard_header_len;
5787
5788        xdp_len += len_diff; /* minus result pass check */
5789        if (xdp_len > dev_len)
5790                ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
5791
5792        /* BPF verifier guarantees valid pointer */
5793        *mtu_len = mtu;
5794
5795        return ret;
5796}
5797
5798static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
5799        .func           = bpf_skb_check_mtu,
5800        .gpl_only       = true,
5801        .ret_type       = RET_INTEGER,
5802        .arg1_type      = ARG_PTR_TO_CTX,
5803        .arg2_type      = ARG_ANYTHING,
5804        .arg3_type      = ARG_PTR_TO_INT,
5805        .arg4_type      = ARG_ANYTHING,
5806        .arg5_type      = ARG_ANYTHING,
5807};
5808
5809static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
5810        .func           = bpf_xdp_check_mtu,
5811        .gpl_only       = true,
5812        .ret_type       = RET_INTEGER,
5813        .arg1_type      = ARG_PTR_TO_CTX,
5814        .arg2_type      = ARG_ANYTHING,
5815        .arg3_type      = ARG_PTR_TO_INT,
5816        .arg4_type      = ARG_ANYTHING,
5817        .arg5_type      = ARG_ANYTHING,
5818};
5819
5820#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5821static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
5822{
5823        int err;
5824        struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
5825
5826        if (!seg6_validate_srh(srh, len, false))
5827                return -EINVAL;
5828
5829        switch (type) {
5830        case BPF_LWT_ENCAP_SEG6_INLINE:
5831                if (skb->protocol != htons(ETH_P_IPV6))
5832                        return -EBADMSG;
5833
5834                err = seg6_do_srh_inline(skb, srh);
5835                break;
5836        case BPF_LWT_ENCAP_SEG6:
5837                skb_reset_inner_headers(skb);
5838                skb->encapsulation = 1;
5839                err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
5840                break;
5841        default:
5842                return -EINVAL;
5843        }
5844
5845        bpf_compute_data_pointers(skb);
5846        if (err)
5847                return err;
5848
5849        ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
5850        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
5851
5852        return seg6_lookup_nexthop(skb, NULL, 0);
5853}
5854#endif /* CONFIG_IPV6_SEG6_BPF */
5855
5856#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5857static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
5858                             bool ingress)
5859{
5860        return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
5861}
5862#endif
5863
5864BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
5865           u32, len)
5866{
5867        switch (type) {
5868#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5869        case BPF_LWT_ENCAP_SEG6:
5870        case BPF_LWT_ENCAP_SEG6_INLINE:
5871                return bpf_push_seg6_encap(skb, type, hdr, len);
5872#endif
5873#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5874        case BPF_LWT_ENCAP_IP:
5875                return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
5876#endif
5877        default:
5878                return -EINVAL;
5879        }
5880}
5881
5882BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
5883           void *, hdr, u32, len)
5884{
5885        switch (type) {
5886#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5887        case BPF_LWT_ENCAP_IP:
5888                return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
5889#endif
5890        default:
5891                return -EINVAL;
5892        }
5893}
5894
5895static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
5896        .func           = bpf_lwt_in_push_encap,
5897        .gpl_only       = false,
5898        .ret_type       = RET_INTEGER,
5899        .arg1_type      = ARG_PTR_TO_CTX,
5900        .arg2_type      = ARG_ANYTHING,
5901        .arg3_type      = ARG_PTR_TO_MEM,
5902        .arg4_type      = ARG_CONST_SIZE
5903};
5904
5905static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
5906        .func           = bpf_lwt_xmit_push_encap,
5907        .gpl_only       = false,
5908        .ret_type       = RET_INTEGER,
5909        .arg1_type      = ARG_PTR_TO_CTX,
5910        .arg2_type      = ARG_ANYTHING,
5911        .arg3_type      = ARG_PTR_TO_MEM,
5912        .arg4_type      = ARG_CONST_SIZE
5913};
5914
5915#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5916BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
5917           const void *, from, u32, len)
5918{
5919        struct seg6_bpf_srh_state *srh_state =
5920                this_cpu_ptr(&seg6_bpf_srh_states);
5921        struct ipv6_sr_hdr *srh = srh_state->srh;
5922        void *srh_tlvs, *srh_end, *ptr;
5923        int srhoff = 0;
5924
5925        if (srh == NULL)
5926                return -EINVAL;
5927
5928        srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
5929        srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
5930
5931        ptr = skb->data + offset;
5932        if (ptr >= srh_tlvs && ptr + len <= srh_end)
5933                srh_state->valid = false;
5934        else if (ptr < (void *)&srh->flags ||
5935                 ptr + len > (void *)&srh->segments)
5936                return -EFAULT;
5937
5938        if (unlikely(bpf_try_make_writable(skb, offset + len)))
5939                return -EFAULT;
5940        if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
5941                return -EINVAL;
5942        srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
5943
5944        memcpy(skb->data + offset, from, len);
5945        return 0;
5946}
5947
5948static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
5949        .func           = bpf_lwt_seg6_store_bytes,
5950        .gpl_only       = false,
5951        .ret_type       = RET_INTEGER,
5952        .arg1_type      = ARG_PTR_TO_CTX,
5953        .arg2_type      = ARG_ANYTHING,
5954        .arg3_type      = ARG_PTR_TO_MEM,
5955        .arg4_type      = ARG_CONST_SIZE
5956};
5957
5958static void bpf_update_srh_state(struct sk_buff *skb)
5959{
5960        struct seg6_bpf_srh_state *srh_state =
5961                this_cpu_ptr(&seg6_bpf_srh_states);
5962        int srhoff = 0;
5963
5964        if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
5965                srh_state->srh = NULL;
5966        } else {
5967                srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
5968                srh_state->hdrlen = srh_state->srh->hdrlen << 3;
5969                srh_state->valid = true;
5970        }
5971}
5972
5973BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
5974           u32, action, void *, param, u32, param_len)
5975{
5976        struct seg6_bpf_srh_state *srh_state =
5977                this_cpu_ptr(&seg6_bpf_srh_states);
5978        int hdroff = 0;
5979        int err;
5980
5981        switch (action) {
5982        case SEG6_LOCAL_ACTION_END_X:
5983                if (!seg6_bpf_has_valid_srh(skb))
5984                        return -EBADMSG;
5985                if (param_len != sizeof(struct in6_addr))
5986                        return -EINVAL;
5987                return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
5988        case SEG6_LOCAL_ACTION_END_T:
5989                if (!seg6_bpf_has_valid_srh(skb))
5990                        return -EBADMSG;
5991                if (param_len != sizeof(int))
5992                        return -EINVAL;
5993                return seg6_lookup_nexthop(skb, NULL, *(int *)param);
5994        case SEG6_LOCAL_ACTION_END_DT6:
5995                if (!seg6_bpf_has_valid_srh(skb))
5996                        return -EBADMSG;
5997                if (param_len != sizeof(int))
5998                        return -EINVAL;
5999
6000                if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
6001                        return -EBADMSG;
6002                if (!pskb_pull(skb, hdroff))
6003                        return -EBADMSG;
6004
6005                skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
6006                skb_reset_network_header(skb);
6007                skb_reset_transport_header(skb);
6008                skb->encapsulation = 0;
6009
6010                bpf_compute_data_pointers(skb);
6011                bpf_update_srh_state(skb);
6012                return seg6_lookup_nexthop(skb, NULL, *(int *)param);
6013        case SEG6_LOCAL_ACTION_END_B6:
6014                if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6015                        return -EBADMSG;
6016                err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
6017                                          param, param_len);
6018                if (!err)
6019                        bpf_update_srh_state(skb);
6020
6021                return err;
6022        case SEG6_LOCAL_ACTION_END_B6_ENCAP:
6023                if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6024                        return -EBADMSG;
6025                err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
6026                                          param, param_len);
6027                if (!err)
6028                        bpf_update_srh_state(skb);
6029
6030                return err;
6031        default:
6032                return -EINVAL;
6033        }
6034}
6035
6036static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
6037        .func           = bpf_lwt_seg6_action,
6038        .gpl_only       = false,
6039        .ret_type       = RET_INTEGER,
6040        .arg1_type      = ARG_PTR_TO_CTX,
6041        .arg2_type      = ARG_ANYTHING,
6042        .arg3_type      = ARG_PTR_TO_MEM,
6043        .arg4_type      = ARG_CONST_SIZE
6044};
6045
6046BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
6047           s32, len)
6048{
6049        struct seg6_bpf_srh_state *srh_state =
6050                this_cpu_ptr(&seg6_bpf_srh_states);
6051        struct ipv6_sr_hdr *srh = srh_state->srh;
6052        void *srh_end, *srh_tlvs, *ptr;
6053        struct ipv6hdr *hdr;
6054        int srhoff = 0;
6055        int ret;
6056
6057        if (unlikely(srh == NULL))
6058                return -EINVAL;
6059
6060        srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
6061                        ((srh->first_segment + 1) << 4));
6062        srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
6063                        srh_state->hdrlen);
6064        ptr = skb->data + offset;
6065
6066        if (unlikely(ptr < srh_tlvs || ptr > srh_end))
6067                return -EFAULT;
6068        if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
6069                return -EFAULT;
6070
6071        if (len > 0) {
6072                ret = skb_cow_head(skb, len);
6073                if (unlikely(ret < 0))
6074                        return ret;
6075
6076                ret = bpf_skb_net_hdr_push(skb, offset, len);
6077        } else {
6078                ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
6079        }
6080
6081        bpf_compute_data_pointers(skb);
6082        if (unlikely(ret < 0))
6083                return ret;
6084
6085        hdr = (struct ipv6hdr *)skb->data;
6086        hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
6087
6088        if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
6089                return -EINVAL;
6090        srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6091        srh_state->hdrlen += len;
6092        srh_state->valid = false;
6093        return 0;
6094}
6095
6096static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
6097        .func           = bpf_lwt_seg6_adjust_srh,
6098        .gpl_only       = false,
6099        .ret_type       = RET_INTEGER,
6100        .arg1_type      = ARG_PTR_TO_CTX,
6101        .arg2_type      = ARG_ANYTHING,
6102        .arg3_type      = ARG_ANYTHING,
6103};
6104#endif /* CONFIG_IPV6_SEG6_BPF */
6105
6106#ifdef CONFIG_INET
6107static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
6108                              int dif, int sdif, u8 family, u8 proto)
6109{
6110        bool refcounted = false;
6111        struct sock *sk = NULL;
6112
6113        if (family == AF_INET) {
6114                __be32 src4 = tuple->ipv4.saddr;
6115                __be32 dst4 = tuple->ipv4.daddr;
6116
6117                if (proto == IPPROTO_TCP)
6118                        sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
6119                                           src4, tuple->ipv4.sport,
6120                                           dst4, tuple->ipv4.dport,
6121                                           dif, sdif, &refcounted);
6122                else
6123                        sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
6124                                               dst4, tuple->ipv4.dport,
6125                                               dif, sdif, &udp_table, NULL);
6126#if IS_ENABLED(CONFIG_IPV6)
6127        } else {
6128                struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
6129                struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
6130
6131                if (proto == IPPROTO_TCP)
6132                        sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
6133                                            src6, tuple->ipv6.sport,
6134                                            dst6, ntohs(tuple->ipv6.dport),
6135                                            dif, sdif, &refcounted);
6136                else if (likely(ipv6_bpf_stub))
6137                        sk = ipv6_bpf_stub->udp6_lib_lookup(net,
6138                                                            src6, tuple->ipv6.sport,
6139                                                            dst6, tuple->ipv6.dport,
6140                                                            dif, sdif,
6141                                                            &udp_table, NULL);
6142#endif
6143        }
6144
6145        if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
6146                WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
6147                sk = NULL;
6148        }
6149        return sk;
6150}
6151
6152/* bpf_skc_lookup performs the core lookup for different types of sockets,
6153 * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
6154 * Returns the socket as an 'unsigned long' to simplify the casting in the
6155 * callers to satisfy BPF_CALL declarations.
6156 */
6157static struct sock *
6158__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6159                 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6160                 u64 flags)
6161{
6162        struct sock *sk = NULL;
6163        u8 family = AF_UNSPEC;
6164        struct net *net;
6165        int sdif;
6166
6167        if (len == sizeof(tuple->ipv4))
6168                family = AF_INET;
6169        else if (len == sizeof(tuple->ipv6))
6170                family = AF_INET6;
6171        else
6172                return NULL;
6173
6174        if (unlikely(family == AF_UNSPEC || flags ||
6175                     !((s32)netns_id < 0 || netns_id <= S32_MAX)))
6176                goto out;
6177
6178        if (family == AF_INET)
6179                sdif = inet_sdif(skb);
6180        else
6181                sdif = inet6_sdif(skb);
6182
6183        if ((s32)netns_id < 0) {
6184                net = caller_net;
6185                sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
6186        } else {
6187                net = get_net_ns_by_id(caller_net, netns_id);
6188                if (unlikely(!net))
6189                        goto out;
6190                sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
6191                put_net(net);
6192        }
6193
6194out:
6195        return sk;
6196}
6197
6198static struct sock *
6199__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6200                struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6201                u64 flags)
6202{
6203        struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
6204                                           ifindex, proto, netns_id, flags);
6205
6206        if (sk) {
6207                sk = sk_to_full_sk(sk);
6208                if (!sk_fullsock(sk)) {
6209                        sock_gen_put(sk);
6210                        return NULL;
6211                }
6212        }
6213
6214        return sk;
6215}
6216
6217static struct sock *
6218bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6219               u8 proto, u64 netns_id, u64 flags)
6220{
6221        struct net *caller_net;
6222        int ifindex;
6223
6224        if (skb->dev) {
6225                caller_net = dev_net(skb->dev);
6226                ifindex = skb->dev->ifindex;
6227        } else {
6228                caller_net = sock_net(skb->sk);
6229                ifindex = 0;
6230        }
6231
6232        return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
6233                                netns_id, flags);
6234}
6235
6236static struct sock *
6237bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6238              u8 proto, u64 netns_id, u64 flags)
6239{
6240        struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
6241                                         flags);
6242
6243        if (sk) {
6244                sk = sk_to_full_sk(sk);
6245                if (!sk_fullsock(sk)) {
6246                        sock_gen_put(sk);
6247                        return NULL;
6248                }
6249        }
6250
6251        return sk;
6252}
6253
6254BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
6255           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6256{
6257        return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
6258                                             netns_id, flags);
6259}
6260
6261static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
6262        .func           = bpf_skc_lookup_tcp,
6263        .gpl_only       = false,
6264        .pkt_access     = true,
6265        .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6266        .arg1_type      = ARG_PTR_TO_CTX,
6267        .arg2_type      = ARG_PTR_TO_MEM,
6268        .arg3_type      = ARG_CONST_SIZE,
6269        .arg4_type      = ARG_ANYTHING,
6270        .arg5_type      = ARG_ANYTHING,
6271};
6272
6273BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
6274           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6275{
6276        return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
6277                                            netns_id, flags);
6278}
6279
6280static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
6281        .func           = bpf_sk_lookup_tcp,
6282        .gpl_only       = false,
6283        .pkt_access     = true,
6284        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6285        .arg1_type      = ARG_PTR_TO_CTX,
6286        .arg2_type      = ARG_PTR_TO_MEM,
6287        .arg3_type      = ARG_CONST_SIZE,
6288        .arg4_type      = ARG_ANYTHING,
6289        .arg5_type      = ARG_ANYTHING,
6290};
6291
6292BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
6293           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6294{
6295        return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
6296                                            netns_id, flags);
6297}
6298
6299static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
6300        .func           = bpf_sk_lookup_udp,
6301        .gpl_only       = false,
6302        .pkt_access     = true,
6303        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6304        .arg1_type      = ARG_PTR_TO_CTX,
6305        .arg2_type      = ARG_PTR_TO_MEM,
6306        .arg3_type      = ARG_CONST_SIZE,
6307        .arg4_type      = ARG_ANYTHING,
6308        .arg5_type      = ARG_ANYTHING,
6309};
6310
6311BPF_CALL_1(bpf_sk_release, struct sock *, sk)
6312{
6313        if (sk && sk_is_refcounted(sk))
6314                sock_gen_put(sk);
6315        return 0;
6316}
6317
6318static const struct bpf_func_proto bpf_sk_release_proto = {
6319        .func           = bpf_sk_release,
6320        .gpl_only       = false,
6321        .ret_type       = RET_INTEGER,
6322        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6323};
6324
6325BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
6326           struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6327{
6328        struct net *caller_net = dev_net(ctx->rxq->dev);
6329        int ifindex = ctx->rxq->dev->ifindex;
6330
6331        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6332                                              ifindex, IPPROTO_UDP, netns_id,
6333                                              flags);
6334}
6335
6336static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
6337        .func           = bpf_xdp_sk_lookup_udp,
6338        .gpl_only       = false,
6339        .pkt_access     = true,
6340        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6341        .arg1_type      = ARG_PTR_TO_CTX,
6342        .arg2_type      = ARG_PTR_TO_MEM,
6343        .arg3_type      = ARG_CONST_SIZE,
6344        .arg4_type      = ARG_ANYTHING,
6345        .arg5_type      = ARG_ANYTHING,
6346};
6347
6348BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
6349           struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6350{
6351        struct net *caller_net = dev_net(ctx->rxq->dev);
6352        int ifindex = ctx->rxq->dev->ifindex;
6353
6354        return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
6355                                               ifindex, IPPROTO_TCP, netns_id,
6356                                               flags);
6357}
6358
6359static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
6360        .func           = bpf_xdp_skc_lookup_tcp,
6361        .gpl_only       = false,
6362        .pkt_access     = true,
6363        .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6364        .arg1_type      = ARG_PTR_TO_CTX,
6365        .arg2_type      = ARG_PTR_TO_MEM,
6366        .arg3_type      = ARG_CONST_SIZE,
6367        .arg4_type      = ARG_ANYTHING,
6368        .arg5_type      = ARG_ANYTHING,
6369};
6370
6371BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
6372           struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6373{
6374        struct net *caller_net = dev_net(ctx->rxq->dev);
6375        int ifindex = ctx->rxq->dev->ifindex;
6376
6377        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6378                                              ifindex, IPPROTO_TCP, netns_id,
6379                                              flags);
6380}
6381
6382static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
6383        .func           = bpf_xdp_sk_lookup_tcp,
6384        .gpl_only       = false,
6385        .pkt_access     = true,
6386        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6387        .arg1_type      = ARG_PTR_TO_CTX,
6388        .arg2_type      = ARG_PTR_TO_MEM,
6389        .arg3_type      = ARG_CONST_SIZE,
6390        .arg4_type      = ARG_ANYTHING,
6391        .arg5_type      = ARG_ANYTHING,
6392};
6393
6394BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6395           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6396{
6397        return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
6398                                               sock_net(ctx->sk), 0,
6399                                               IPPROTO_TCP, netns_id, flags);
6400}
6401
6402static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
6403        .func           = bpf_sock_addr_skc_lookup_tcp,
6404        .gpl_only       = false,
6405        .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6406        .arg1_type      = ARG_PTR_TO_CTX,
6407        .arg2_type      = ARG_PTR_TO_MEM,
6408        .arg3_type      = ARG_CONST_SIZE,
6409        .arg4_type      = ARG_ANYTHING,
6410        .arg5_type      = ARG_ANYTHING,
6411};
6412
6413BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6414           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6415{
6416        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6417                                              sock_net(ctx->sk), 0, IPPROTO_TCP,
6418                                              netns_id, flags);
6419}
6420
6421static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
6422        .func           = bpf_sock_addr_sk_lookup_tcp,
6423        .gpl_only       = false,
6424        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6425        .arg1_type      = ARG_PTR_TO_CTX,
6426        .arg2_type      = ARG_PTR_TO_MEM,
6427        .arg3_type      = ARG_CONST_SIZE,
6428        .arg4_type      = ARG_ANYTHING,
6429        .arg5_type      = ARG_ANYTHING,
6430};
6431
6432BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
6433           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6434{
6435        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6436                                              sock_net(ctx->sk), 0, IPPROTO_UDP,
6437                                              netns_id, flags);
6438}
6439
6440static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
6441        .func           = bpf_sock_addr_sk_lookup_udp,
6442        .gpl_only       = false,
6443        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6444        .arg1_type      = ARG_PTR_TO_CTX,
6445        .arg2_type      = ARG_PTR_TO_MEM,
6446        .arg3_type      = ARG_CONST_SIZE,
6447        .arg4_type      = ARG_ANYTHING,
6448        .arg5_type      = ARG_ANYTHING,
6449};
6450
6451bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6452                                  struct bpf_insn_access_aux *info)
6453{
6454        if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
6455                                          icsk_retransmits))
6456                return false;
6457
6458        if (off % size != 0)
6459                return false;
6460
6461        switch (off) {
6462        case offsetof(struct bpf_tcp_sock, bytes_received):
6463        case offsetof(struct bpf_tcp_sock, bytes_acked):
6464                return size == sizeof(__u64);
6465        default:
6466                return size == sizeof(__u32);
6467        }
6468}
6469
6470u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
6471                                    const struct bpf_insn *si,
6472                                    struct bpf_insn *insn_buf,
6473                                    struct bpf_prog *prog, u32 *target_size)
6474{
6475        struct bpf_insn *insn = insn_buf;
6476
6477#define BPF_TCP_SOCK_GET_COMMON(FIELD)                                  \
6478        do {                                                            \
6479                BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) >     \
6480                             sizeof_field(struct bpf_tcp_sock, FIELD)); \
6481                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
6482                                      si->dst_reg, si->src_reg,         \
6483                                      offsetof(struct tcp_sock, FIELD)); \
6484        } while (0)
6485
6486#define BPF_INET_SOCK_GET_COMMON(FIELD)                                 \
6487        do {                                                            \
6488                BUILD_BUG_ON(sizeof_field(struct inet_connection_sock,  \
6489                                          FIELD) >                      \
6490                             sizeof_field(struct bpf_tcp_sock, FIELD)); \
6491                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                 \
6492                                        struct inet_connection_sock,    \
6493                                        FIELD),                         \
6494                                      si->dst_reg, si->src_reg,         \
6495                                      offsetof(                         \
6496                                        struct inet_connection_sock,    \
6497                                        FIELD));                        \
6498        } while (0)
6499
6500        if (insn > insn_buf)
6501                return insn - insn_buf;
6502
6503        switch (si->off) {
6504        case offsetof(struct bpf_tcp_sock, rtt_min):
6505                BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
6506                             sizeof(struct minmax));
6507                BUILD_BUG_ON(sizeof(struct minmax) <
6508                             sizeof(struct minmax_sample));
6509
6510                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
6511                                      offsetof(struct tcp_sock, rtt_min) +
6512                                      offsetof(struct minmax_sample, v));
6513                break;
6514        case offsetof(struct bpf_tcp_sock, snd_cwnd):
6515                BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
6516                break;
6517        case offsetof(struct bpf_tcp_sock, srtt_us):
6518                BPF_TCP_SOCK_GET_COMMON(srtt_us);
6519                break;
6520        case offsetof(struct bpf_tcp_sock, snd_ssthresh):
6521                BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
6522                break;
6523        case offsetof(struct bpf_tcp_sock, rcv_nxt):
6524                BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
6525                break;
6526        case offsetof(struct bpf_tcp_sock, snd_nxt):
6527                BPF_TCP_SOCK_GET_COMMON(snd_nxt);
6528                break;
6529        case offsetof(struct bpf_tcp_sock, snd_una):
6530                BPF_TCP_SOCK_GET_COMMON(snd_una);
6531                break;
6532        case offsetof(struct bpf_tcp_sock, mss_cache):
6533                BPF_TCP_SOCK_GET_COMMON(mss_cache);
6534                break;
6535        case offsetof(struct bpf_tcp_sock, ecn_flags):
6536                BPF_TCP_SOCK_GET_COMMON(ecn_flags);
6537                break;
6538        case offsetof(struct bpf_tcp_sock, rate_delivered):
6539                BPF_TCP_SOCK_GET_COMMON(rate_delivered);
6540                break;
6541        case offsetof(struct bpf_tcp_sock, rate_interval_us):
6542                BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
6543                break;
6544        case offsetof(struct bpf_tcp_sock, packets_out):
6545                BPF_TCP_SOCK_GET_COMMON(packets_out);
6546                break;
6547        case offsetof(struct bpf_tcp_sock, retrans_out):
6548                BPF_TCP_SOCK_GET_COMMON(retrans_out);
6549                break;
6550        case offsetof(struct bpf_tcp_sock, total_retrans):
6551                BPF_TCP_SOCK_GET_COMMON(total_retrans);
6552                break;
6553        case offsetof(struct bpf_tcp_sock, segs_in):
6554                BPF_TCP_SOCK_GET_COMMON(segs_in);
6555                break;
6556        case offsetof(struct bpf_tcp_sock, data_segs_in):
6557                BPF_TCP_SOCK_GET_COMMON(data_segs_in);
6558                break;
6559        case offsetof(struct bpf_tcp_sock, segs_out):
6560                BPF_TCP_SOCK_GET_COMMON(segs_out);
6561                break;
6562        case offsetof(struct bpf_tcp_sock, data_segs_out):
6563                BPF_TCP_SOCK_GET_COMMON(data_segs_out);
6564                break;
6565        case offsetof(struct bpf_tcp_sock, lost_out):
6566                BPF_TCP_SOCK_GET_COMMON(lost_out);
6567                break;
6568        case offsetof(struct bpf_tcp_sock, sacked_out):
6569                BPF_TCP_SOCK_GET_COMMON(sacked_out);
6570                break;
6571        case offsetof(struct bpf_tcp_sock, bytes_received):
6572                BPF_TCP_SOCK_GET_COMMON(bytes_received);
6573                break;
6574        case offsetof(struct bpf_tcp_sock, bytes_acked):
6575                BPF_TCP_SOCK_GET_COMMON(bytes_acked);
6576                break;
6577        case offsetof(struct bpf_tcp_sock, dsack_dups):
6578                BPF_TCP_SOCK_GET_COMMON(dsack_dups);
6579                break;
6580        case offsetof(struct bpf_tcp_sock, delivered):
6581                BPF_TCP_SOCK_GET_COMMON(delivered);
6582                break;
6583        case offsetof(struct bpf_tcp_sock, delivered_ce):
6584                BPF_TCP_SOCK_GET_COMMON(delivered_ce);
6585                break;
6586        case offsetof(struct bpf_tcp_sock, icsk_retransmits):
6587                BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
6588                break;
6589        }
6590
6591        return insn - insn_buf;
6592}
6593
6594BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
6595{
6596        if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
6597                return (unsigned long)sk;
6598
6599        return (unsigned long)NULL;
6600}
6601
6602const struct bpf_func_proto bpf_tcp_sock_proto = {
6603        .func           = bpf_tcp_sock,
6604        .gpl_only       = false,
6605        .ret_type       = RET_PTR_TO_TCP_SOCK_OR_NULL,
6606        .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
6607};
6608
6609BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
6610{
6611        sk = sk_to_full_sk(sk);
6612
6613        if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
6614                return (unsigned long)sk;
6615
6616        return (unsigned long)NULL;
6617}
6618
6619static const struct bpf_func_proto bpf_get_listener_sock_proto = {
6620        .func           = bpf_get_listener_sock,
6621        .gpl_only       = false,
6622        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6623        .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
6624};
6625
6626BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
6627{
6628        unsigned int iphdr_len;
6629
6630        switch (skb_protocol(skb, true)) {
6631        case cpu_to_be16(ETH_P_IP):
6632                iphdr_len = sizeof(struct iphdr);
6633                break;
6634        case cpu_to_be16(ETH_P_IPV6):
6635                iphdr_len = sizeof(struct ipv6hdr);
6636                break;
6637        default:
6638                return 0;
6639        }
6640
6641        if (skb_headlen(skb) < iphdr_len)
6642                return 0;
6643
6644        if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
6645                return 0;
6646
6647        return INET_ECN_set_ce(skb);
6648}
6649
6650bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6651                                  struct bpf_insn_access_aux *info)
6652{
6653        if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
6654                return false;
6655
6656        if (off % size != 0)
6657                return false;
6658
6659        switch (off) {
6660        default:
6661                return size == sizeof(__u32);
6662        }
6663}
6664
6665u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
6666                                    const struct bpf_insn *si,
6667                                    struct bpf_insn *insn_buf,
6668                                    struct bpf_prog *prog, u32 *target_size)
6669{
6670        struct bpf_insn *insn = insn_buf;
6671
6672#define BPF_XDP_SOCK_GET(FIELD)                                         \
6673        do {                                                            \
6674                BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) >     \
6675                             sizeof_field(struct bpf_xdp_sock, FIELD)); \
6676                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
6677                                      si->dst_reg, si->src_reg,         \
6678                                      offsetof(struct xdp_sock, FIELD)); \
6679        } while (0)
6680
6681        switch (si->off) {
6682        case offsetof(struct bpf_xdp_sock, queue_id):
6683                BPF_XDP_SOCK_GET(queue_id);
6684                break;
6685        }
6686
6687        return insn - insn_buf;
6688}
6689
6690static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
6691        .func           = bpf_skb_ecn_set_ce,
6692        .gpl_only       = false,
6693        .ret_type       = RET_INTEGER,
6694        .arg1_type      = ARG_PTR_TO_CTX,
6695};
6696
6697BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
6698           struct tcphdr *, th, u32, th_len)
6699{
6700#ifdef CONFIG_SYN_COOKIES
6701        u32 cookie;
6702        int ret;
6703
6704        if (unlikely(!sk || th_len < sizeof(*th)))
6705                return -EINVAL;
6706
6707        /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
6708        if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6709                return -EINVAL;
6710
6711        if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6712                return -EINVAL;
6713
6714        if (!th->ack || th->rst || th->syn)
6715                return -ENOENT;
6716
6717        if (tcp_synq_no_recent_overflow(sk))
6718                return -ENOENT;
6719
6720        cookie = ntohl(th->ack_seq) - 1;
6721
6722        switch (sk->sk_family) {
6723        case AF_INET:
6724                if (unlikely(iph_len < sizeof(struct iphdr)))
6725                        return -EINVAL;
6726
6727                ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
6728                break;
6729
6730#if IS_BUILTIN(CONFIG_IPV6)
6731        case AF_INET6:
6732                if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6733                        return -EINVAL;
6734
6735                ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
6736                break;
6737#endif /* CONFIG_IPV6 */
6738
6739        default:
6740                return -EPROTONOSUPPORT;
6741        }
6742
6743        if (ret > 0)
6744                return 0;
6745
6746        return -ENOENT;
6747#else
6748        return -ENOTSUPP;
6749#endif
6750}
6751
6752static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
6753        .func           = bpf_tcp_check_syncookie,
6754        .gpl_only       = true,
6755        .pkt_access     = true,
6756        .ret_type       = RET_INTEGER,
6757        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6758        .arg2_type      = ARG_PTR_TO_MEM,
6759        .arg3_type      = ARG_CONST_SIZE,
6760        .arg4_type      = ARG_PTR_TO_MEM,
6761        .arg5_type      = ARG_CONST_SIZE,
6762};
6763
6764BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
6765           struct tcphdr *, th, u32, th_len)
6766{
6767#ifdef CONFIG_SYN_COOKIES
6768        u32 cookie;
6769        u16 mss;
6770
6771        if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
6772                return -EINVAL;
6773
6774        if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6775                return -EINVAL;
6776
6777        if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6778                return -ENOENT;
6779
6780        if (!th->syn || th->ack || th->fin || th->rst)
6781                return -EINVAL;
6782
6783        if (unlikely(iph_len < sizeof(struct iphdr)))
6784                return -EINVAL;
6785
6786        /* Both struct iphdr and struct ipv6hdr have the version field at the
6787         * same offset so we can cast to the shorter header (struct iphdr).
6788         */
6789        switch (((struct iphdr *)iph)->version) {
6790        case 4:
6791                if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
6792                        return -EINVAL;
6793
6794                mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
6795                break;
6796
6797#if IS_BUILTIN(CONFIG_IPV6)
6798        case 6:
6799                if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6800                        return -EINVAL;
6801
6802                if (sk->sk_family != AF_INET6)
6803                        return -EINVAL;
6804
6805                mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
6806                break;
6807#endif /* CONFIG_IPV6 */
6808
6809        default:
6810                return -EPROTONOSUPPORT;
6811        }
6812        if (mss == 0)
6813                return -ENOENT;
6814
6815        return cookie | ((u64)mss << 32);
6816#else
6817        return -EOPNOTSUPP;
6818#endif /* CONFIG_SYN_COOKIES */
6819}
6820
6821static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
6822        .func           = bpf_tcp_gen_syncookie,
6823        .gpl_only       = true, /* __cookie_v*_init_sequence() is GPL */
6824        .pkt_access     = true,
6825        .ret_type       = RET_INTEGER,
6826        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6827        .arg2_type      = ARG_PTR_TO_MEM,
6828        .arg3_type      = ARG_CONST_SIZE,
6829        .arg4_type      = ARG_PTR_TO_MEM,
6830        .arg5_type      = ARG_CONST_SIZE,
6831};
6832
6833BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
6834{
6835        if (!sk || flags != 0)
6836                return -EINVAL;
6837        if (!skb_at_tc_ingress(skb))
6838                return -EOPNOTSUPP;
6839        if (unlikely(dev_net(skb->dev) != sock_net(sk)))
6840                return -ENETUNREACH;
6841        if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
6842                return -ESOCKTNOSUPPORT;
6843        if (sk_is_refcounted(sk) &&
6844            unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
6845                return -ENOENT;
6846
6847        skb_orphan(skb);
6848        skb->sk = sk;
6849        skb->destructor = sock_pfree;
6850
6851        return 0;
6852}
6853
6854static const struct bpf_func_proto bpf_sk_assign_proto = {
6855        .func           = bpf_sk_assign,
6856        .gpl_only       = false,
6857        .ret_type       = RET_INTEGER,
6858        .arg1_type      = ARG_PTR_TO_CTX,
6859        .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6860        .arg3_type      = ARG_ANYTHING,
6861};
6862
6863static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
6864                                    u8 search_kind, const u8 *magic,
6865                                    u8 magic_len, bool *eol)
6866{
6867        u8 kind, kind_len;
6868
6869        *eol = false;
6870
6871        while (op < opend) {
6872                kind = op[0];
6873
6874                if (kind == TCPOPT_EOL) {
6875                        *eol = true;
6876                        return ERR_PTR(-ENOMSG);
6877                } else if (kind == TCPOPT_NOP) {
6878                        op++;
6879                        continue;
6880                }
6881
6882                if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
6883                        /* Something is wrong in the received header.
6884                         * Follow the TCP stack's tcp_parse_options()
6885                         * and just bail here.
6886                         */
6887                        return ERR_PTR(-EFAULT);
6888
6889                kind_len = op[1];
6890                if (search_kind == kind) {
6891                        if (!magic_len)
6892                                return op;
6893
6894                        if (magic_len > kind_len - 2)
6895                                return ERR_PTR(-ENOMSG);
6896
6897                        if (!memcmp(&op[2], magic, magic_len))
6898                                return op;
6899                }
6900
6901                op += kind_len;
6902        }
6903
6904        return ERR_PTR(-ENOMSG);
6905}
6906
6907BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
6908           void *, search_res, u32, len, u64, flags)
6909{
6910        bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
6911        const u8 *op, *opend, *magic, *search = search_res;
6912        u8 search_kind, search_len, copy_len, magic_len;
6913        int ret;
6914
6915        /* 2 byte is the minimal option len except TCPOPT_NOP and
6916         * TCPOPT_EOL which are useless for the bpf prog to learn
6917         * and this helper disallow loading them also.
6918         */
6919        if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
6920                return -EINVAL;
6921
6922        search_kind = search[0];
6923        search_len = search[1];
6924
6925        if (search_len > len || search_kind == TCPOPT_NOP ||
6926            search_kind == TCPOPT_EOL)
6927                return -EINVAL;
6928
6929        if (search_kind == TCPOPT_EXP || search_kind == 253) {
6930                /* 16 or 32 bit magic.  +2 for kind and kind length */
6931                if (search_len != 4 && search_len != 6)
6932                        return -EINVAL;
6933                magic = &search[2];
6934                magic_len = search_len - 2;
6935        } else {
6936                if (search_len)
6937                        return -EINVAL;
6938                magic = NULL;
6939                magic_len = 0;
6940        }
6941
6942        if (load_syn) {
6943                ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
6944                if (ret < 0)
6945                        return ret;
6946
6947                opend = op + ret;
6948                op += sizeof(struct tcphdr);
6949        } else {
6950                if (!bpf_sock->skb ||
6951                    bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
6952                        /* This bpf_sock->op cannot call this helper */
6953                        return -EPERM;
6954
6955                opend = bpf_sock->skb_data_end;
6956                op = bpf_sock->skb->data + sizeof(struct tcphdr);
6957        }
6958
6959        op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
6960                                &eol);
6961        if (IS_ERR(op))
6962                return PTR_ERR(op);
6963
6964        copy_len = op[1];
6965        ret = copy_len;
6966        if (copy_len > len) {
6967                ret = -ENOSPC;
6968                copy_len = len;
6969        }
6970
6971        memcpy(search_res, op, copy_len);
6972        return ret;
6973}
6974
6975static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
6976        .func           = bpf_sock_ops_load_hdr_opt,
6977        .gpl_only       = false,
6978        .ret_type       = RET_INTEGER,
6979        .arg1_type      = ARG_PTR_TO_CTX,
6980        .arg2_type      = ARG_PTR_TO_MEM,
6981        .arg3_type      = ARG_CONST_SIZE,
6982        .arg4_type      = ARG_ANYTHING,
6983};
6984
6985BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
6986           const void *, from, u32, len, u64, flags)
6987{
6988        u8 new_kind, new_kind_len, magic_len = 0, *opend;
6989        const u8 *op, *new_op, *magic = NULL;
6990        struct sk_buff *skb;
6991        bool eol;
6992
6993        if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
6994                return -EPERM;
6995
6996        if (len < 2 || flags)
6997                return -EINVAL;
6998
6999        new_op = from;
7000        new_kind = new_op[0];
7001        new_kind_len = new_op[1];
7002
7003        if (new_kind_len > len || new_kind == TCPOPT_NOP ||
7004            new_kind == TCPOPT_EOL)
7005                return -EINVAL;
7006
7007        if (new_kind_len > bpf_sock->remaining_opt_len)
7008                return -ENOSPC;
7009
7010        /* 253 is another experimental kind */
7011        if (new_kind == TCPOPT_EXP || new_kind == 253)  {
7012                if (new_kind_len < 4)
7013                        return -EINVAL;
7014                /* Match for the 2 byte magic also.
7015                 * RFC 6994: the magic could be 2 or 4 bytes.
7016                 * Hence, matching by 2 byte only is on the
7017                 * conservative side but it is the right
7018                 * thing to do for the 'search-for-duplication'
7019                 * purpose.
7020                 */
7021                magic = &new_op[2];
7022                magic_len = 2;
7023        }
7024
7025        /* Check for duplication */
7026        skb = bpf_sock->skb;
7027        op = skb->data + sizeof(struct tcphdr);
7028        opend = bpf_sock->skb_data_end;
7029
7030        op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
7031                                &eol);
7032        if (!IS_ERR(op))
7033                return -EEXIST;
7034
7035        if (PTR_ERR(op) != -ENOMSG)
7036                return PTR_ERR(op);
7037
7038        if (eol)
7039                /* The option has been ended.  Treat it as no more
7040                 * header option can be written.
7041                 */
7042                return -ENOSPC;
7043
7044        /* No duplication found.  Store the header option. */
7045        memcpy(opend, from, new_kind_len);
7046
7047        bpf_sock->remaining_opt_len -= new_kind_len;
7048        bpf_sock->skb_data_end += new_kind_len;
7049
7050        return 0;
7051}
7052
7053static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
7054        .func           = bpf_sock_ops_store_hdr_opt,
7055        .gpl_only       = false,
7056        .ret_type       = RET_INTEGER,
7057        .arg1_type      = ARG_PTR_TO_CTX,
7058        .arg2_type      = ARG_PTR_TO_MEM,
7059        .arg3_type      = ARG_CONST_SIZE,
7060        .arg4_type      = ARG_ANYTHING,
7061};
7062
7063BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7064           u32, len, u64, flags)
7065{
7066        if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7067                return -EPERM;
7068
7069        if (flags || len < 2)
7070                return -EINVAL;
7071
7072        if (len > bpf_sock->remaining_opt_len)
7073                return -ENOSPC;
7074
7075        bpf_sock->remaining_opt_len -= len;
7076
7077        return 0;
7078}
7079
7080static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
7081        .func           = bpf_sock_ops_reserve_hdr_opt,
7082        .gpl_only       = false,
7083        .ret_type       = RET_INTEGER,
7084        .arg1_type      = ARG_PTR_TO_CTX,
7085        .arg2_type      = ARG_ANYTHING,
7086        .arg3_type      = ARG_ANYTHING,
7087};
7088
7089#endif /* CONFIG_INET */
7090
7091bool bpf_helper_changes_pkt_data(void *func)
7092{
7093        if (func == bpf_skb_vlan_push ||
7094            func == bpf_skb_vlan_pop ||
7095            func == bpf_skb_store_bytes ||
7096            func == bpf_skb_change_proto ||
7097            func == bpf_skb_change_head ||
7098            func == sk_skb_change_head ||
7099            func == bpf_skb_change_tail ||
7100            func == sk_skb_change_tail ||
7101            func == bpf_skb_adjust_room ||
7102            func == sk_skb_adjust_room ||
7103            func == bpf_skb_pull_data ||
7104            func == sk_skb_pull_data ||
7105            func == bpf_clone_redirect ||
7106            func == bpf_l3_csum_replace ||
7107            func == bpf_l4_csum_replace ||
7108            func == bpf_xdp_adjust_head ||
7109            func == bpf_xdp_adjust_meta ||
7110            func == bpf_msg_pull_data ||
7111            func == bpf_msg_push_data ||
7112            func == bpf_msg_pop_data ||
7113            func == bpf_xdp_adjust_tail ||
7114#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
7115            func == bpf_lwt_seg6_store_bytes ||
7116            func == bpf_lwt_seg6_adjust_srh ||
7117            func == bpf_lwt_seg6_action ||
7118#endif
7119#ifdef CONFIG_INET
7120            func == bpf_sock_ops_store_hdr_opt ||
7121#endif
7122            func == bpf_lwt_in_push_encap ||
7123            func == bpf_lwt_xmit_push_encap)
7124                return true;
7125
7126        return false;
7127}
7128
7129const struct bpf_func_proto bpf_event_output_data_proto __weak;
7130const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
7131
7132static const struct bpf_func_proto *
7133sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7134{
7135        switch (func_id) {
7136        /* inet and inet6 sockets are created in a process
7137         * context so there is always a valid uid/gid
7138         */
7139        case BPF_FUNC_get_current_uid_gid:
7140                return &bpf_get_current_uid_gid_proto;
7141        case BPF_FUNC_get_local_storage:
7142                return &bpf_get_local_storage_proto;
7143        case BPF_FUNC_get_socket_cookie:
7144                return &bpf_get_socket_cookie_sock_proto;
7145        case BPF_FUNC_get_netns_cookie:
7146                return &bpf_get_netns_cookie_sock_proto;
7147        case BPF_FUNC_perf_event_output:
7148                return &bpf_event_output_data_proto;
7149        case BPF_FUNC_get_current_pid_tgid:
7150                return &bpf_get_current_pid_tgid_proto;
7151        case BPF_FUNC_get_current_comm:
7152                return &bpf_get_current_comm_proto;
7153#ifdef CONFIG_CGROUPS
7154        case BPF_FUNC_get_current_cgroup_id:
7155                return &bpf_get_current_cgroup_id_proto;
7156        case BPF_FUNC_get_current_ancestor_cgroup_id:
7157                return &bpf_get_current_ancestor_cgroup_id_proto;
7158#endif
7159#ifdef CONFIG_CGROUP_NET_CLASSID
7160        case BPF_FUNC_get_cgroup_classid:
7161                return &bpf_get_cgroup_classid_curr_proto;
7162#endif
7163        case BPF_FUNC_sk_storage_get:
7164                return &bpf_sk_storage_get_cg_sock_proto;
7165        default:
7166                return bpf_base_func_proto(func_id);
7167        }
7168}
7169
7170static const struct bpf_func_proto *
7171sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7172{
7173        switch (func_id) {
7174        /* inet and inet6 sockets are created in a process
7175         * context so there is always a valid uid/gid
7176         */
7177        case BPF_FUNC_get_current_uid_gid:
7178                return &bpf_get_current_uid_gid_proto;
7179        case BPF_FUNC_bind:
7180                switch (prog->expected_attach_type) {
7181                case BPF_CGROUP_INET4_CONNECT:
7182                case BPF_CGROUP_INET6_CONNECT:
7183                        return &bpf_bind_proto;
7184                default:
7185                        return NULL;
7186                }
7187        case BPF_FUNC_get_socket_cookie:
7188                return &bpf_get_socket_cookie_sock_addr_proto;
7189        case BPF_FUNC_get_netns_cookie:
7190                return &bpf_get_netns_cookie_sock_addr_proto;
7191        case BPF_FUNC_get_local_storage:
7192                return &bpf_get_local_storage_proto;
7193        case BPF_FUNC_perf_event_output:
7194                return &bpf_event_output_data_proto;
7195        case BPF_FUNC_get_current_pid_tgid:
7196                return &bpf_get_current_pid_tgid_proto;
7197        case BPF_FUNC_get_current_comm:
7198                return &bpf_get_current_comm_proto;
7199#ifdef CONFIG_CGROUPS
7200        case BPF_FUNC_get_current_cgroup_id:
7201                return &bpf_get_current_cgroup_id_proto;
7202        case BPF_FUNC_get_current_ancestor_cgroup_id:
7203                return &bpf_get_current_ancestor_cgroup_id_proto;
7204#endif
7205#ifdef CONFIG_CGROUP_NET_CLASSID
7206        case BPF_FUNC_get_cgroup_classid:
7207                return &bpf_get_cgroup_classid_curr_proto;
7208#endif
7209#ifdef CONFIG_INET
7210        case BPF_FUNC_sk_lookup_tcp:
7211                return &bpf_sock_addr_sk_lookup_tcp_proto;
7212        case BPF_FUNC_sk_lookup_udp:
7213                return &bpf_sock_addr_sk_lookup_udp_proto;
7214        case BPF_FUNC_sk_release:
7215                return &bpf_sk_release_proto;
7216        case BPF_FUNC_skc_lookup_tcp:
7217                return &bpf_sock_addr_skc_lookup_tcp_proto;
7218#endif /* CONFIG_INET */
7219        case BPF_FUNC_sk_storage_get:
7220                return &bpf_sk_storage_get_proto;
7221        case BPF_FUNC_sk_storage_delete:
7222                return &bpf_sk_storage_delete_proto;
7223        case BPF_FUNC_setsockopt:
7224                switch (prog->expected_attach_type) {
7225                case BPF_CGROUP_INET4_BIND:
7226                case BPF_CGROUP_INET6_BIND:
7227                case BPF_CGROUP_INET4_CONNECT:
7228                case BPF_CGROUP_INET6_CONNECT:
7229                case BPF_CGROUP_UDP4_RECVMSG:
7230                case BPF_CGROUP_UDP6_RECVMSG:
7231                case BPF_CGROUP_UDP4_SENDMSG:
7232                case BPF_CGROUP_UDP6_SENDMSG:
7233                case BPF_CGROUP_INET4_GETPEERNAME:
7234                case BPF_CGROUP_INET6_GETPEERNAME:
7235                case BPF_CGROUP_INET4_GETSOCKNAME:
7236                case BPF_CGROUP_INET6_GETSOCKNAME:
7237                        return &bpf_sock_addr_setsockopt_proto;
7238                default:
7239                        return NULL;
7240                }
7241        case BPF_FUNC_getsockopt:
7242                switch (prog->expected_attach_type) {
7243                case BPF_CGROUP_INET4_BIND:
7244                case BPF_CGROUP_INET6_BIND:
7245                case BPF_CGROUP_INET4_CONNECT:
7246                case BPF_CGROUP_INET6_CONNECT:
7247                case BPF_CGROUP_UDP4_RECVMSG:
7248                case BPF_CGROUP_UDP6_RECVMSG:
7249                case BPF_CGROUP_UDP4_SENDMSG:
7250                case BPF_CGROUP_UDP6_SENDMSG:
7251                case BPF_CGROUP_INET4_GETPEERNAME:
7252                case BPF_CGROUP_INET6_GETPEERNAME:
7253                case BPF_CGROUP_INET4_GETSOCKNAME:
7254                case BPF_CGROUP_INET6_GETSOCKNAME:
7255                        return &bpf_sock_addr_getsockopt_proto;
7256                default:
7257                        return NULL;
7258                }
7259        default:
7260                return bpf_sk_base_func_proto(func_id);
7261        }
7262}
7263
7264static const struct bpf_func_proto *
7265sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7266{
7267        switch (func_id) {
7268        case BPF_FUNC_skb_load_bytes:
7269                return &bpf_skb_load_bytes_proto;
7270        case BPF_FUNC_skb_load_bytes_relative:
7271                return &bpf_skb_load_bytes_relative_proto;
7272        case BPF_FUNC_get_socket_cookie:
7273                return &bpf_get_socket_cookie_proto;
7274        case BPF_FUNC_get_socket_uid:
7275                return &bpf_get_socket_uid_proto;
7276        case BPF_FUNC_perf_event_output:
7277                return &bpf_skb_event_output_proto;
7278        default:
7279                return bpf_sk_base_func_proto(func_id);
7280        }
7281}
7282
7283const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
7284const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
7285
7286static const struct bpf_func_proto *
7287cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7288{
7289        switch (func_id) {
7290        case BPF_FUNC_get_local_storage:
7291                return &bpf_get_local_storage_proto;
7292        case BPF_FUNC_sk_fullsock:
7293                return &bpf_sk_fullsock_proto;
7294        case BPF_FUNC_sk_storage_get:
7295                return &bpf_sk_storage_get_proto;
7296        case BPF_FUNC_sk_storage_delete:
7297                return &bpf_sk_storage_delete_proto;
7298        case BPF_FUNC_perf_event_output:
7299                return &bpf_skb_event_output_proto;
7300#ifdef CONFIG_SOCK_CGROUP_DATA
7301        case BPF_FUNC_skb_cgroup_id:
7302                return &bpf_skb_cgroup_id_proto;
7303        case BPF_FUNC_skb_ancestor_cgroup_id:
7304                return &bpf_skb_ancestor_cgroup_id_proto;
7305        case BPF_FUNC_sk_cgroup_id:
7306                return &bpf_sk_cgroup_id_proto;
7307        case BPF_FUNC_sk_ancestor_cgroup_id:
7308                return &bpf_sk_ancestor_cgroup_id_proto;
7309#endif
7310#ifdef CONFIG_INET
7311        case BPF_FUNC_sk_lookup_tcp:
7312                return &bpf_sk_lookup_tcp_proto;
7313        case BPF_FUNC_sk_lookup_udp:
7314                return &bpf_sk_lookup_udp_proto;
7315        case BPF_FUNC_sk_release:
7316                return &bpf_sk_release_proto;
7317        case BPF_FUNC_skc_lookup_tcp:
7318                return &bpf_skc_lookup_tcp_proto;
7319        case BPF_FUNC_tcp_sock:
7320                return &bpf_tcp_sock_proto;
7321        case BPF_FUNC_get_listener_sock:
7322                return &bpf_get_listener_sock_proto;
7323        case BPF_FUNC_skb_ecn_set_ce:
7324                return &bpf_skb_ecn_set_ce_proto;
7325#endif
7326        default:
7327                return sk_filter_func_proto(func_id, prog);
7328        }
7329}
7330
7331static const struct bpf_func_proto *
7332tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7333{
7334        switch (func_id) {
7335        case BPF_FUNC_skb_store_bytes:
7336                return &bpf_skb_store_bytes_proto;
7337        case BPF_FUNC_skb_load_bytes:
7338                return &bpf_skb_load_bytes_proto;
7339        case BPF_FUNC_skb_load_bytes_relative:
7340                return &bpf_skb_load_bytes_relative_proto;
7341        case BPF_FUNC_skb_pull_data:
7342                return &bpf_skb_pull_data_proto;
7343        case BPF_FUNC_csum_diff:
7344                return &bpf_csum_diff_proto;
7345        case BPF_FUNC_csum_update:
7346                return &bpf_csum_update_proto;
7347        case BPF_FUNC_csum_level:
7348                return &bpf_csum_level_proto;
7349        case BPF_FUNC_l3_csum_replace:
7350                return &bpf_l3_csum_replace_proto;
7351        case BPF_FUNC_l4_csum_replace:
7352                return &bpf_l4_csum_replace_proto;
7353        case BPF_FUNC_clone_redirect:
7354                return &bpf_clone_redirect_proto;
7355        case BPF_FUNC_get_cgroup_classid:
7356                return &bpf_get_cgroup_classid_proto;
7357        case BPF_FUNC_skb_vlan_push:
7358                return &bpf_skb_vlan_push_proto;
7359        case BPF_FUNC_skb_vlan_pop:
7360                return &bpf_skb_vlan_pop_proto;
7361        case BPF_FUNC_skb_change_proto:
7362                return &bpf_skb_change_proto_proto;
7363        case BPF_FUNC_skb_change_type:
7364                return &bpf_skb_change_type_proto;
7365        case BPF_FUNC_skb_adjust_room:
7366                return &bpf_skb_adjust_room_proto;
7367        case BPF_FUNC_skb_change_tail:
7368                return &bpf_skb_change_tail_proto;
7369        case BPF_FUNC_skb_change_head:
7370                return &bpf_skb_change_head_proto;
7371        case BPF_FUNC_skb_get_tunnel_key:
7372                return &bpf_skb_get_tunnel_key_proto;
7373        case BPF_FUNC_skb_set_tunnel_key:
7374                return bpf_get_skb_set_tunnel_proto(func_id);
7375        case BPF_FUNC_skb_get_tunnel_opt:
7376                return &bpf_skb_get_tunnel_opt_proto;
7377        case BPF_FUNC_skb_set_tunnel_opt:
7378                return bpf_get_skb_set_tunnel_proto(func_id);
7379        case BPF_FUNC_redirect:
7380                return &bpf_redirect_proto;
7381        case BPF_FUNC_redirect_neigh:
7382                return &bpf_redirect_neigh_proto;
7383        case BPF_FUNC_redirect_peer:
7384                return &bpf_redirect_peer_proto;
7385        case BPF_FUNC_get_route_realm:
7386                return &bpf_get_route_realm_proto;
7387        case BPF_FUNC_get_hash_recalc:
7388                return &bpf_get_hash_recalc_proto;
7389        case BPF_FUNC_set_hash_invalid:
7390                return &bpf_set_hash_invalid_proto;
7391        case BPF_FUNC_set_hash:
7392                return &bpf_set_hash_proto;
7393        case BPF_FUNC_perf_event_output:
7394                return &bpf_skb_event_output_proto;
7395        case BPF_FUNC_get_smp_processor_id:
7396                return &bpf_get_smp_processor_id_proto;
7397        case BPF_FUNC_skb_under_cgroup:
7398                return &bpf_skb_under_cgroup_proto;
7399        case BPF_FUNC_get_socket_cookie:
7400                return &bpf_get_socket_cookie_proto;
7401        case BPF_FUNC_get_socket_uid:
7402                return &bpf_get_socket_uid_proto;
7403        case BPF_FUNC_fib_lookup:
7404                return &bpf_skb_fib_lookup_proto;
7405        case BPF_FUNC_check_mtu:
7406                return &bpf_skb_check_mtu_proto;
7407        case BPF_FUNC_sk_fullsock:
7408                return &bpf_sk_fullsock_proto;
7409        case BPF_FUNC_sk_storage_get:
7410                return &bpf_sk_storage_get_proto;
7411        case BPF_FUNC_sk_storage_delete:
7412                return &bpf_sk_storage_delete_proto;
7413#ifdef CONFIG_XFRM
7414        case BPF_FUNC_skb_get_xfrm_state:
7415                return &bpf_skb_get_xfrm_state_proto;
7416#endif
7417#ifdef CONFIG_CGROUP_NET_CLASSID
7418        case BPF_FUNC_skb_cgroup_classid:
7419                return &bpf_skb_cgroup_classid_proto;
7420#endif
7421#ifdef CONFIG_SOCK_CGROUP_DATA
7422        case BPF_FUNC_skb_cgroup_id:
7423                return &bpf_skb_cgroup_id_proto;
7424        case BPF_FUNC_skb_ancestor_cgroup_id:
7425                return &bpf_skb_ancestor_cgroup_id_proto;
7426#endif
7427#ifdef CONFIG_INET
7428        case BPF_FUNC_sk_lookup_tcp:
7429                return &bpf_sk_lookup_tcp_proto;
7430        case BPF_FUNC_sk_lookup_udp:
7431                return &bpf_sk_lookup_udp_proto;
7432        case BPF_FUNC_sk_release:
7433                return &bpf_sk_release_proto;
7434        case BPF_FUNC_tcp_sock:
7435                return &bpf_tcp_sock_proto;
7436        case BPF_FUNC_get_listener_sock:
7437                return &bpf_get_listener_sock_proto;
7438        case BPF_FUNC_skc_lookup_tcp:
7439                return &bpf_skc_lookup_tcp_proto;
7440        case BPF_FUNC_tcp_check_syncookie:
7441                return &bpf_tcp_check_syncookie_proto;
7442        case BPF_FUNC_skb_ecn_set_ce:
7443                return &bpf_skb_ecn_set_ce_proto;
7444        case BPF_FUNC_tcp_gen_syncookie:
7445                return &bpf_tcp_gen_syncookie_proto;
7446        case BPF_FUNC_sk_assign:
7447                return &bpf_sk_assign_proto;
7448#endif
7449        default:
7450                return bpf_sk_base_func_proto(func_id);
7451        }
7452}
7453
7454static const struct bpf_func_proto *
7455xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7456{
7457        switch (func_id) {
7458        case BPF_FUNC_perf_event_output:
7459                return &bpf_xdp_event_output_proto;
7460        case BPF_FUNC_get_smp_processor_id:
7461                return &bpf_get_smp_processor_id_proto;
7462        case BPF_FUNC_csum_diff:
7463                return &bpf_csum_diff_proto;
7464        case BPF_FUNC_xdp_adjust_head:
7465                return &bpf_xdp_adjust_head_proto;
7466        case BPF_FUNC_xdp_adjust_meta:
7467                return &bpf_xdp_adjust_meta_proto;
7468        case BPF_FUNC_redirect:
7469                return &bpf_xdp_redirect_proto;
7470        case BPF_FUNC_redirect_map:
7471                return &bpf_xdp_redirect_map_proto;
7472        case BPF_FUNC_xdp_adjust_tail:
7473                return &bpf_xdp_adjust_tail_proto;
7474        case BPF_FUNC_fib_lookup:
7475                return &bpf_xdp_fib_lookup_proto;
7476        case BPF_FUNC_check_mtu:
7477                return &bpf_xdp_check_mtu_proto;
7478#ifdef CONFIG_INET
7479        case BPF_FUNC_sk_lookup_udp:
7480                return &bpf_xdp_sk_lookup_udp_proto;
7481        case BPF_FUNC_sk_lookup_tcp:
7482                return &bpf_xdp_sk_lookup_tcp_proto;
7483        case BPF_FUNC_sk_release:
7484                return &bpf_sk_release_proto;
7485        case BPF_FUNC_skc_lookup_tcp:
7486                return &bpf_xdp_skc_lookup_tcp_proto;
7487        case BPF_FUNC_tcp_check_syncookie:
7488                return &bpf_tcp_check_syncookie_proto;
7489        case BPF_FUNC_tcp_gen_syncookie:
7490                return &bpf_tcp_gen_syncookie_proto;
7491#endif
7492        default:
7493                return bpf_sk_base_func_proto(func_id);
7494        }
7495}
7496
7497const struct bpf_func_proto bpf_sock_map_update_proto __weak;
7498const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
7499
7500static const struct bpf_func_proto *
7501sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7502{
7503        switch (func_id) {
7504        case BPF_FUNC_setsockopt:
7505                return &bpf_sock_ops_setsockopt_proto;
7506        case BPF_FUNC_getsockopt:
7507                return &bpf_sock_ops_getsockopt_proto;
7508        case BPF_FUNC_sock_ops_cb_flags_set:
7509                return &bpf_sock_ops_cb_flags_set_proto;
7510        case BPF_FUNC_sock_map_update:
7511                return &bpf_sock_map_update_proto;
7512        case BPF_FUNC_sock_hash_update:
7513                return &bpf_sock_hash_update_proto;
7514        case BPF_FUNC_get_socket_cookie:
7515                return &bpf_get_socket_cookie_sock_ops_proto;
7516        case BPF_FUNC_get_local_storage:
7517                return &bpf_get_local_storage_proto;
7518        case BPF_FUNC_perf_event_output:
7519                return &bpf_event_output_data_proto;
7520        case BPF_FUNC_sk_storage_get:
7521                return &bpf_sk_storage_get_proto;
7522        case BPF_FUNC_sk_storage_delete:
7523                return &bpf_sk_storage_delete_proto;
7524        case BPF_FUNC_get_netns_cookie:
7525                return &bpf_get_netns_cookie_sock_ops_proto;
7526#ifdef CONFIG_INET
7527        case BPF_FUNC_load_hdr_opt:
7528                return &bpf_sock_ops_load_hdr_opt_proto;
7529        case BPF_FUNC_store_hdr_opt:
7530                return &bpf_sock_ops_store_hdr_opt_proto;
7531        case BPF_FUNC_reserve_hdr_opt:
7532                return &bpf_sock_ops_reserve_hdr_opt_proto;
7533        case BPF_FUNC_tcp_sock:
7534                return &bpf_tcp_sock_proto;
7535#endif /* CONFIG_INET */
7536        default:
7537                return bpf_sk_base_func_proto(func_id);
7538        }
7539}
7540
7541const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
7542const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
7543
7544static const struct bpf_func_proto *
7545sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7546{
7547        switch (func_id) {
7548        case BPF_FUNC_msg_redirect_map:
7549                return &bpf_msg_redirect_map_proto;
7550        case BPF_FUNC_msg_redirect_hash:
7551                return &bpf_msg_redirect_hash_proto;
7552        case BPF_FUNC_msg_apply_bytes:
7553                return &bpf_msg_apply_bytes_proto;
7554        case BPF_FUNC_msg_cork_bytes:
7555                return &bpf_msg_cork_bytes_proto;
7556        case BPF_FUNC_msg_pull_data:
7557                return &bpf_msg_pull_data_proto;
7558        case BPF_FUNC_msg_push_data:
7559                return &bpf_msg_push_data_proto;
7560        case BPF_FUNC_msg_pop_data:
7561                return &bpf_msg_pop_data_proto;
7562        case BPF_FUNC_perf_event_output:
7563                return &bpf_event_output_data_proto;
7564        case BPF_FUNC_get_current_uid_gid:
7565                return &bpf_get_current_uid_gid_proto;
7566        case BPF_FUNC_get_current_pid_tgid:
7567                return &bpf_get_current_pid_tgid_proto;
7568        case BPF_FUNC_sk_storage_get:
7569                return &bpf_sk_storage_get_proto;
7570        case BPF_FUNC_sk_storage_delete:
7571                return &bpf_sk_storage_delete_proto;
7572        case BPF_FUNC_get_netns_cookie:
7573                return &bpf_get_netns_cookie_sk_msg_proto;
7574#ifdef CONFIG_CGROUPS
7575        case BPF_FUNC_get_current_cgroup_id:
7576                return &bpf_get_current_cgroup_id_proto;
7577        case BPF_FUNC_get_current_ancestor_cgroup_id:
7578                return &bpf_get_current_ancestor_cgroup_id_proto;
7579#endif
7580#ifdef CONFIG_CGROUP_NET_CLASSID
7581        case BPF_FUNC_get_cgroup_classid:
7582                return &bpf_get_cgroup_classid_curr_proto;
7583#endif
7584        default:
7585                return bpf_sk_base_func_proto(func_id);
7586        }
7587}
7588
7589const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
7590const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
7591
7592static const struct bpf_func_proto *
7593sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7594{
7595        switch (func_id) {
7596        case BPF_FUNC_skb_store_bytes:
7597                return &bpf_skb_store_bytes_proto;
7598        case BPF_FUNC_skb_load_bytes:
7599                return &bpf_skb_load_bytes_proto;
7600        case BPF_FUNC_skb_pull_data:
7601                return &sk_skb_pull_data_proto;
7602        case BPF_FUNC_skb_change_tail:
7603                return &sk_skb_change_tail_proto;
7604        case BPF_FUNC_skb_change_head:
7605                return &sk_skb_change_head_proto;
7606        case BPF_FUNC_skb_adjust_room:
7607                return &sk_skb_adjust_room_proto;
7608        case BPF_FUNC_get_socket_cookie:
7609                return &bpf_get_socket_cookie_proto;
7610        case BPF_FUNC_get_socket_uid:
7611                return &bpf_get_socket_uid_proto;
7612        case BPF_FUNC_sk_redirect_map:
7613                return &bpf_sk_redirect_map_proto;
7614        case BPF_FUNC_sk_redirect_hash:
7615                return &bpf_sk_redirect_hash_proto;
7616        case BPF_FUNC_perf_event_output:
7617                return &bpf_skb_event_output_proto;
7618#ifdef CONFIG_INET
7619        case BPF_FUNC_sk_lookup_tcp:
7620                return &bpf_sk_lookup_tcp_proto;
7621        case BPF_FUNC_sk_lookup_udp:
7622                return &bpf_sk_lookup_udp_proto;
7623        case BPF_FUNC_sk_release:
7624                return &bpf_sk_release_proto;
7625        case BPF_FUNC_skc_lookup_tcp:
7626                return &bpf_skc_lookup_tcp_proto;
7627#endif
7628        default:
7629                return bpf_sk_base_func_proto(func_id);
7630        }
7631}
7632
7633static const struct bpf_func_proto *
7634flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7635{
7636        switch (func_id) {
7637        case BPF_FUNC_skb_load_bytes:
7638                return &bpf_flow_dissector_load_bytes_proto;
7639        default:
7640                return bpf_sk_base_func_proto(func_id);
7641        }
7642}
7643
7644static const struct bpf_func_proto *
7645lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7646{
7647        switch (func_id) {
7648        case BPF_FUNC_skb_load_bytes:
7649                return &bpf_skb_load_bytes_proto;
7650        case BPF_FUNC_skb_pull_data:
7651                return &bpf_skb_pull_data_proto;
7652        case BPF_FUNC_csum_diff:
7653                return &bpf_csum_diff_proto;
7654        case BPF_FUNC_get_cgroup_classid:
7655                return &bpf_get_cgroup_classid_proto;
7656        case BPF_FUNC_get_route_realm:
7657                return &bpf_get_route_realm_proto;
7658        case BPF_FUNC_get_hash_recalc:
7659                return &bpf_get_hash_recalc_proto;
7660        case BPF_FUNC_perf_event_output:
7661                return &bpf_skb_event_output_proto;
7662        case BPF_FUNC_get_smp_processor_id:
7663                return &bpf_get_smp_processor_id_proto;
7664        case BPF_FUNC_skb_under_cgroup:
7665                return &bpf_skb_under_cgroup_proto;
7666        default:
7667                return bpf_sk_base_func_proto(func_id);
7668        }
7669}
7670
7671static const struct bpf_func_proto *
7672lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7673{
7674        switch (func_id) {
7675        case BPF_FUNC_lwt_push_encap:
7676                return &bpf_lwt_in_push_encap_proto;
7677        default:
7678                return lwt_out_func_proto(func_id, prog);
7679        }
7680}
7681
7682static const struct bpf_func_proto *
7683lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7684{
7685        switch (func_id) {
7686        case BPF_FUNC_skb_get_tunnel_key:
7687                return &bpf_skb_get_tunnel_key_proto;
7688        case BPF_FUNC_skb_set_tunnel_key:
7689                return bpf_get_skb_set_tunnel_proto(func_id);
7690        case BPF_FUNC_skb_get_tunnel_opt:
7691                return &bpf_skb_get_tunnel_opt_proto;
7692        case BPF_FUNC_skb_set_tunnel_opt:
7693                return bpf_get_skb_set_tunnel_proto(func_id);
7694        case BPF_FUNC_redirect:
7695                return &bpf_redirect_proto;
7696        case BPF_FUNC_clone_redirect:
7697                return &bpf_clone_redirect_proto;
7698        case BPF_FUNC_skb_change_tail:
7699                return &bpf_skb_change_tail_proto;
7700        case BPF_FUNC_skb_change_head:
7701                return &bpf_skb_change_head_proto;
7702        case BPF_FUNC_skb_store_bytes:
7703                return &bpf_skb_store_bytes_proto;
7704        case BPF_FUNC_csum_update:
7705                return &bpf_csum_update_proto;
7706        case BPF_FUNC_csum_level:
7707                return &bpf_csum_level_proto;
7708        case BPF_FUNC_l3_csum_replace:
7709                return &bpf_l3_csum_replace_proto;
7710        case BPF_FUNC_l4_csum_replace:
7711                return &bpf_l4_csum_replace_proto;
7712        case BPF_FUNC_set_hash_invalid:
7713                return &bpf_set_hash_invalid_proto;
7714        case BPF_FUNC_lwt_push_encap:
7715                return &bpf_lwt_xmit_push_encap_proto;
7716        default:
7717                return lwt_out_func_proto(func_id, prog);
7718        }
7719}
7720
7721static const struct bpf_func_proto *
7722lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7723{
7724        switch (func_id) {
7725#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
7726        case BPF_FUNC_lwt_seg6_store_bytes:
7727                return &bpf_lwt_seg6_store_bytes_proto;
7728        case BPF_FUNC_lwt_seg6_action:
7729                return &bpf_lwt_seg6_action_proto;
7730        case BPF_FUNC_lwt_seg6_adjust_srh:
7731                return &bpf_lwt_seg6_adjust_srh_proto;
7732#endif
7733        default:
7734                return lwt_out_func_proto(func_id, prog);
7735        }
7736}
7737
7738static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
7739                                    const struct bpf_prog *prog,
7740                                    struct bpf_insn_access_aux *info)
7741{
7742        const int size_default = sizeof(__u32);
7743
7744        if (off < 0 || off >= sizeof(struct __sk_buff))
7745                return false;
7746
7747        /* The verifier guarantees that size > 0. */
7748        if (off % size != 0)
7749                return false;
7750
7751        switch (off) {
7752        case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7753                if (off + size > offsetofend(struct __sk_buff, cb[4]))
7754                        return false;
7755                break;
7756        case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
7757        case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
7758        case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
7759        case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
7760        case bpf_ctx_range(struct __sk_buff, data):
7761        case bpf_ctx_range(struct __sk_buff, data_meta):
7762        case bpf_ctx_range(struct __sk_buff, data_end):
7763                if (size != size_default)
7764                        return false;
7765                break;
7766        case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
7767                return false;
7768        case bpf_ctx_range(struct __sk_buff, tstamp):
7769                if (size != sizeof(__u64))
7770                        return false;
7771                break;
7772        case offsetof(struct __sk_buff, sk):
7773                if (type == BPF_WRITE || size != sizeof(__u64))
7774                        return false;
7775                info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
7776                break;
7777        default:
7778                /* Only narrow read access allowed for now. */
7779                if (type == BPF_WRITE) {
7780                        if (size != size_default)
7781                                return false;
7782                } else {
7783                        bpf_ctx_record_field_size(info, size_default);
7784                        if (!bpf_ctx_narrow_access_ok(off, size, size_default))
7785                                return false;
7786                }
7787        }
7788
7789        return true;
7790}
7791
7792static bool sk_filter_is_valid_access(int off, int size,
7793                                      enum bpf_access_type type,
7794                                      const struct bpf_prog *prog,
7795                                      struct bpf_insn_access_aux *info)
7796{
7797        switch (off) {
7798        case bpf_ctx_range(struct __sk_buff, tc_classid):
7799        case bpf_ctx_range(struct __sk_buff, data):
7800        case bpf_ctx_range(struct __sk_buff, data_meta):
7801        case bpf_ctx_range(struct __sk_buff, data_end):
7802        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
7803        case bpf_ctx_range(struct __sk_buff, tstamp):
7804        case bpf_ctx_range(struct __sk_buff, wire_len):
7805                return false;
7806        }
7807
7808        if (type == BPF_WRITE) {
7809                switch (off) {
7810                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7811                        break;
7812                default:
7813                        return false;
7814                }
7815        }
7816
7817        return bpf_skb_is_valid_access(off, size, type, prog, info);
7818}
7819
7820static bool cg_skb_is_valid_access(int off, int size,
7821                                   enum bpf_access_type type,
7822                                   const struct bpf_prog *prog,
7823                                   struct bpf_insn_access_aux *info)
7824{
7825        switch (off) {
7826        case bpf_ctx_range(struct __sk_buff, tc_classid):
7827        case bpf_ctx_range(struct __sk_buff, data_meta):
7828        case bpf_ctx_range(struct __sk_buff, wire_len):
7829                return false;
7830        case bpf_ctx_range(struct __sk_buff, data):
7831        case bpf_ctx_range(struct __sk_buff, data_end):
7832                if (!bpf_capable())
7833                        return false;
7834                break;
7835        }
7836
7837        if (type == BPF_WRITE) {
7838                switch (off) {
7839                case bpf_ctx_range(struct __sk_buff, mark):
7840                case bpf_ctx_range(struct __sk_buff, priority):
7841                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7842                        break;
7843                case bpf_ctx_range(struct __sk_buff, tstamp):
7844                        if (!bpf_capable())
7845                                return false;
7846                        break;
7847                default:
7848                        return false;
7849                }
7850        }
7851
7852        switch (off) {
7853        case bpf_ctx_range(struct __sk_buff, data):
7854                info->reg_type = PTR_TO_PACKET;
7855                break;
7856        case bpf_ctx_range(struct __sk_buff, data_end):
7857                info->reg_type = PTR_TO_PACKET_END;
7858                break;
7859        }
7860
7861        return bpf_skb_is_valid_access(off, size, type, prog, info);
7862}
7863
7864static bool lwt_is_valid_access(int off, int size,
7865                                enum bpf_access_type type,
7866                                const struct bpf_prog *prog,
7867                                struct bpf_insn_access_aux *info)
7868{
7869        switch (off) {
7870        case bpf_ctx_range(struct __sk_buff, tc_classid):
7871        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
7872        case bpf_ctx_range(struct __sk_buff, data_meta):
7873        case bpf_ctx_range(struct __sk_buff, tstamp):
7874        case bpf_ctx_range(struct __sk_buff, wire_len):
7875                return false;
7876        }
7877
7878        if (type == BPF_WRITE) {
7879                switch (off) {
7880                case bpf_ctx_range(struct __sk_buff, mark):
7881                case bpf_ctx_range(struct __sk_buff, priority):
7882                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7883                        break;
7884                default:
7885                        return false;
7886                }
7887        }
7888
7889        switch (off) {
7890        case bpf_ctx_range(struct __sk_buff, data):
7891                info->reg_type = PTR_TO_PACKET;
7892                break;
7893        case bpf_ctx_range(struct __sk_buff, data_end):
7894                info->reg_type = PTR_TO_PACKET_END;
7895                break;
7896        }
7897
7898        return bpf_skb_is_valid_access(off, size, type, prog, info);
7899}
7900
7901/* Attach type specific accesses */
7902static bool __sock_filter_check_attach_type(int off,
7903                                            enum bpf_access_type access_type,
7904                                            enum bpf_attach_type attach_type)
7905{
7906        switch (off) {
7907        case offsetof(struct bpf_sock, bound_dev_if):
7908        case offsetof(struct bpf_sock, mark):
7909        case offsetof(struct bpf_sock, priority):
7910                switch (attach_type) {
7911                case BPF_CGROUP_INET_SOCK_CREATE:
7912                case BPF_CGROUP_INET_SOCK_RELEASE:
7913                        goto full_access;
7914                default:
7915                        return false;
7916                }
7917        case bpf_ctx_range(struct bpf_sock, src_ip4):
7918                switch (attach_type) {
7919                case BPF_CGROUP_INET4_POST_BIND:
7920                        goto read_only;
7921                default:
7922                        return false;
7923                }
7924        case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
7925                switch (attach_type) {
7926                case BPF_CGROUP_INET6_POST_BIND:
7927                        goto read_only;
7928                default:
7929                        return false;
7930                }
7931        case bpf_ctx_range(struct bpf_sock, src_port):
7932                switch (attach_type) {
7933                case BPF_CGROUP_INET4_POST_BIND:
7934                case BPF_CGROUP_INET6_POST_BIND:
7935                        goto read_only;
7936                default:
7937                        return false;
7938                }
7939        }
7940read_only:
7941        return access_type == BPF_READ;
7942full_access:
7943        return true;
7944}
7945
7946bool bpf_sock_common_is_valid_access(int off, int size,
7947                                     enum bpf_access_type type,
7948                                     struct bpf_insn_access_aux *info)
7949{
7950        switch (off) {
7951        case bpf_ctx_range_till(struct bpf_sock, type, priority):
7952                return false;
7953        default:
7954                return bpf_sock_is_valid_access(off, size, type, info);
7955        }
7956}
7957
7958bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
7959                              struct bpf_insn_access_aux *info)
7960{
7961        const int size_default = sizeof(__u32);
7962
7963        if (off < 0 || off >= sizeof(struct bpf_sock))
7964                return false;
7965        if (off % size != 0)
7966                return false;
7967
7968        switch (off) {
7969        case offsetof(struct bpf_sock, state):
7970        case offsetof(struct bpf_sock, family):
7971        case offsetof(struct bpf_sock, type):
7972        case offsetof(struct bpf_sock, protocol):
7973        case offsetof(struct bpf_sock, dst_port):
7974        case offsetof(struct bpf_sock, src_port):
7975        case offsetof(struct bpf_sock, rx_queue_mapping):
7976        case bpf_ctx_range(struct bpf_sock, src_ip4):
7977        case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
7978        case bpf_ctx_range(struct bpf_sock, dst_ip4):
7979        case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
7980                bpf_ctx_record_field_size(info, size_default);
7981                return bpf_ctx_narrow_access_ok(off, size, size_default);
7982        }
7983
7984        return size == size_default;
7985}
7986
7987static bool sock_filter_is_valid_access(int off, int size,
7988                                        enum bpf_access_type type,
7989                                        const struct bpf_prog *prog,
7990                                        struct bpf_insn_access_aux *info)
7991{
7992        if (!bpf_sock_is_valid_access(off, size, type, info))
7993                return false;
7994        return __sock_filter_check_attach_type(off, type,
7995                                               prog->expected_attach_type);
7996}
7997
7998static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
7999                             const struct bpf_prog *prog)
8000{
8001        /* Neither direct read nor direct write requires any preliminary
8002         * action.
8003         */
8004        return 0;
8005}
8006
8007static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
8008                                const struct bpf_prog *prog, int drop_verdict)
8009{
8010        struct bpf_insn *insn = insn_buf;
8011
8012        if (!direct_write)
8013                return 0;
8014
8015        /* if (!skb->cloned)
8016         *       goto start;
8017         *
8018         * (Fast-path, otherwise approximation that we might be
8019         *  a clone, do the rest in helper.)
8020         */
8021        *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
8022        *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
8023        *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
8024
8025        /* ret = bpf_skb_pull_data(skb, 0); */
8026        *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
8027        *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
8028        *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8029                               BPF_FUNC_skb_pull_data);
8030        /* if (!ret)
8031         *      goto restore;
8032         * return TC_ACT_SHOT;
8033         */
8034        *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
8035        *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
8036        *insn++ = BPF_EXIT_INSN();
8037
8038        /* restore: */
8039        *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
8040        /* start: */
8041        *insn++ = prog->insnsi[0];
8042
8043        return insn - insn_buf;
8044}
8045
8046static int bpf_gen_ld_abs(const struct bpf_insn *orig,
8047                          struct bpf_insn *insn_buf)
8048{
8049        bool indirect = BPF_MODE(orig->code) == BPF_IND;
8050        struct bpf_insn *insn = insn_buf;
8051
8052        if (!indirect) {
8053                *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
8054        } else {
8055                *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
8056                if (orig->imm)
8057                        *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
8058        }
8059        /* We're guaranteed here that CTX is in R6. */
8060        *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
8061
8062        switch (BPF_SIZE(orig->code)) {
8063        case BPF_B:
8064                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
8065                break;
8066        case BPF_H:
8067                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
8068                break;
8069        case BPF_W:
8070                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
8071                break;
8072        }
8073
8074        *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
8075        *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
8076        *insn++ = BPF_EXIT_INSN();
8077
8078        return insn - insn_buf;
8079}
8080
8081static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
8082                               const struct bpf_prog *prog)
8083{
8084        return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
8085}
8086
8087static bool tc_cls_act_is_valid_access(int off, int size,
8088                                       enum bpf_access_type type,
8089                                       const struct bpf_prog *prog,
8090                                       struct bpf_insn_access_aux *info)
8091{
8092        if (type == BPF_WRITE) {
8093                switch (off) {
8094                case bpf_ctx_range(struct __sk_buff, mark):
8095                case bpf_ctx_range(struct __sk_buff, tc_index):
8096                case bpf_ctx_range(struct __sk_buff, priority):
8097                case bpf_ctx_range(struct __sk_buff, tc_classid):
8098                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
8099                case bpf_ctx_range(struct __sk_buff, tstamp):
8100                case bpf_ctx_range(struct __sk_buff, queue_mapping):
8101                        break;
8102                default:
8103                        return false;
8104                }
8105        }
8106
8107        switch (off) {
8108        case bpf_ctx_range(struct __sk_buff, data):
8109                info->reg_type = PTR_TO_PACKET;
8110                break;
8111        case bpf_ctx_range(struct __sk_buff, data_meta):
8112                info->reg_type = PTR_TO_PACKET_META;
8113                break;
8114        case bpf_ctx_range(struct __sk_buff, data_end):
8115                info->reg_type = PTR_TO_PACKET_END;
8116                break;
8117        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8118                return false;
8119        }
8120
8121        return bpf_skb_is_valid_access(off, size, type, prog, info);
8122}
8123
8124static bool __is_valid_xdp_access(int off, int size)
8125{
8126        if (off < 0 || off >= sizeof(struct xdp_md))
8127                return false;
8128        if (off % size != 0)
8129                return false;
8130        if (size != sizeof(__u32))
8131                return false;
8132
8133        return true;
8134}
8135
8136static bool xdp_is_valid_access(int off, int size,
8137                                enum bpf_access_type type,
8138                                const struct bpf_prog *prog,
8139                                struct bpf_insn_access_aux *info)
8140{
8141        if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
8142                switch (off) {
8143                case offsetof(struct xdp_md, egress_ifindex):
8144                        return false;
8145                }
8146        }
8147
8148        if (type == BPF_WRITE) {
8149                if (bpf_prog_is_dev_bound(prog->aux)) {
8150                        switch (off) {
8151                        case offsetof(struct xdp_md, rx_queue_index):
8152                                return __is_valid_xdp_access(off, size);
8153                        }
8154                }
8155                return false;
8156        }
8157
8158        switch (off) {
8159        case offsetof(struct xdp_md, data):
8160                info->reg_type = PTR_TO_PACKET;
8161                break;
8162        case offsetof(struct xdp_md, data_meta):
8163                info->reg_type = PTR_TO_PACKET_META;
8164                break;
8165        case offsetof(struct xdp_md, data_end):
8166                info->reg_type = PTR_TO_PACKET_END;
8167                break;
8168        }
8169
8170        return __is_valid_xdp_access(off, size);
8171}
8172
8173void bpf_warn_invalid_xdp_action(u32 act)
8174{
8175        const u32 act_max = XDP_REDIRECT;
8176
8177        WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
8178                  act > act_max ? "Illegal" : "Driver unsupported",
8179                  act);
8180}
8181EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
8182
8183static bool sock_addr_is_valid_access(int off, int size,
8184                                      enum bpf_access_type type,
8185                                      const struct bpf_prog *prog,
8186                                      struct bpf_insn_access_aux *info)
8187{
8188        const int size_default = sizeof(__u32);
8189
8190        if (off < 0 || off >= sizeof(struct bpf_sock_addr))
8191                return false;
8192        if (off % size != 0)
8193                return false;
8194
8195        /* Disallow access to IPv6 fields from IPv4 contex and vise
8196         * versa.
8197         */
8198        switch (off) {
8199        case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
8200                switch (prog->expected_attach_type) {
8201                case BPF_CGROUP_INET4_BIND:
8202                case BPF_CGROUP_INET4_CONNECT:
8203                case BPF_CGROUP_INET4_GETPEERNAME:
8204                case BPF_CGROUP_INET4_GETSOCKNAME:
8205                case BPF_CGROUP_UDP4_SENDMSG:
8206                case BPF_CGROUP_UDP4_RECVMSG:
8207                        break;
8208                default:
8209                        return false;
8210                }
8211                break;
8212        case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
8213                switch (prog->expected_attach_type) {
8214                case BPF_CGROUP_INET6_BIND:
8215                case BPF_CGROUP_INET6_CONNECT:
8216                case BPF_CGROUP_INET6_GETPEERNAME:
8217                case BPF_CGROUP_INET6_GETSOCKNAME:
8218                case BPF_CGROUP_UDP6_SENDMSG:
8219                case BPF_CGROUP_UDP6_RECVMSG:
8220                        break;
8221                default:
8222                        return false;
8223                }
8224                break;
8225        case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
8226                switch (prog->expected_attach_type) {
8227                case BPF_CGROUP_UDP4_SENDMSG:
8228                        break;
8229                default:
8230                        return false;
8231                }
8232                break;
8233        case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8234                                msg_src_ip6[3]):
8235                switch (prog->expected_attach_type) {
8236                case BPF_CGROUP_UDP6_SENDMSG:
8237                        break;
8238                default:
8239                        return false;
8240                }
8241                break;
8242        }
8243
8244        switch (off) {
8245        case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
8246        case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
8247        case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
8248        case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8249                                msg_src_ip6[3]):
8250        case bpf_ctx_range(struct bpf_sock_addr, user_port):
8251                if (type == BPF_READ) {
8252                        bpf_ctx_record_field_size(info, size_default);
8253
8254                        if (bpf_ctx_wide_access_ok(off, size,
8255                                                   struct bpf_sock_addr,
8256                                                   user_ip6))
8257                                return true;
8258
8259                        if (bpf_ctx_wide_access_ok(off, size,
8260                                                   struct bpf_sock_addr,
8261                                                   msg_src_ip6))
8262                                return true;
8263
8264                        if (!bpf_ctx_narrow_access_ok(off, size, size_default))
8265                                return false;
8266                } else {
8267                        if (bpf_ctx_wide_access_ok(off, size,
8268                                                   struct bpf_sock_addr,
8269                                                   user_ip6))
8270                                return true;
8271
8272                        if (bpf_ctx_wide_access_ok(off, size,
8273                                                   struct bpf_sock_addr,
8274                                                   msg_src_ip6))
8275                                return true;
8276
8277                        if (size != size_default)
8278                                return false;
8279                }
8280                break;
8281        case offsetof(struct bpf_sock_addr, sk):
8282                if (type != BPF_READ)
8283                        return false;
8284                if (size != sizeof(__u64))
8285                        return false;
8286                info->reg_type = PTR_TO_SOCKET;
8287                break;
8288        default:
8289                if (type == BPF_READ) {
8290                        if (size != size_default)
8291                                return false;
8292                } else {
8293                        return false;
8294                }
8295        }
8296
8297        return true;
8298}
8299
8300static bool sock_ops_is_valid_access(int off, int size,
8301                                     enum bpf_access_type type,
8302                                     const struct bpf_prog *prog,
8303                                     struct bpf_insn_access_aux *info)
8304{
8305        const int size_default = sizeof(__u32);
8306
8307        if (off < 0 || off >= sizeof(struct bpf_sock_ops))
8308                return false;
8309
8310        /* The verifier guarantees that size > 0. */
8311        if (off % size != 0)
8312                return false;
8313
8314        if (type == BPF_WRITE) {
8315                switch (off) {
8316                case offsetof(struct bpf_sock_ops, reply):
8317                case offsetof(struct bpf_sock_ops, sk_txhash):
8318                        if (size != size_default)
8319                                return false;
8320                        break;
8321                default:
8322                        return false;
8323                }
8324        } else {
8325                switch (off) {
8326                case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
8327                                        bytes_acked):
8328                        if (size != sizeof(__u64))
8329                                return false;
8330                        break;
8331                case offsetof(struct bpf_sock_ops, sk):
8332                        if (size != sizeof(__u64))
8333                                return false;
8334                        info->reg_type = PTR_TO_SOCKET_OR_NULL;
8335                        break;
8336                case offsetof(struct bpf_sock_ops, skb_data):
8337                        if (size != sizeof(__u64))
8338                                return false;
8339                        info->reg_type = PTR_TO_PACKET;
8340                        break;
8341                case offsetof(struct bpf_sock_ops, skb_data_end):
8342                        if (size != sizeof(__u64))
8343                                return false;
8344                        info->reg_type = PTR_TO_PACKET_END;
8345                        break;
8346                case offsetof(struct bpf_sock_ops, skb_tcp_flags):
8347                        bpf_ctx_record_field_size(info, size_default);
8348                        return bpf_ctx_narrow_access_ok(off, size,
8349                                                        size_default);
8350                default:
8351                        if (size != size_default)
8352                                return false;
8353                        break;
8354                }
8355        }
8356
8357        return true;
8358}
8359
8360static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
8361                           const struct bpf_prog *prog)
8362{
8363        return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
8364}
8365
8366static bool sk_skb_is_valid_access(int off, int size,
8367                                   enum bpf_access_type type,
8368                                   const struct bpf_prog *prog,
8369                                   struct bpf_insn_access_aux *info)
8370{
8371        switch (off) {
8372        case bpf_ctx_range(struct __sk_buff, tc_classid):
8373        case bpf_ctx_range(struct __sk_buff, data_meta):
8374        case bpf_ctx_range(struct __sk_buff, tstamp):
8375        case bpf_ctx_range(struct __sk_buff, wire_len):
8376                return false;
8377        }
8378
8379        if (type == BPF_WRITE) {
8380                switch (off) {
8381                case bpf_ctx_range(struct __sk_buff, tc_index):
8382                case bpf_ctx_range(struct __sk_buff, priority):
8383                        break;
8384                default:
8385                        return false;
8386                }
8387        }
8388
8389        switch (off) {
8390        case bpf_ctx_range(struct __sk_buff, mark):
8391                return false;
8392        case bpf_ctx_range(struct __sk_buff, data):
8393                info->reg_type = PTR_TO_PACKET;
8394                break;
8395        case bpf_ctx_range(struct __sk_buff, data_end):
8396                info->reg_type = PTR_TO_PACKET_END;
8397                break;
8398        }
8399
8400        return bpf_skb_is_valid_access(off, size, type, prog, info);
8401}
8402
8403static bool sk_msg_is_valid_access(int off, int size,
8404                                   enum bpf_access_type type,
8405                                   const struct bpf_prog *prog,
8406                                   struct bpf_insn_access_aux *info)
8407{
8408        if (type == BPF_WRITE)
8409                return false;
8410
8411        if (off % size != 0)
8412                return false;
8413
8414        switch (off) {
8415        case offsetof(struct sk_msg_md, data):
8416                info->reg_type = PTR_TO_PACKET;
8417                if (size != sizeof(__u64))
8418                        return false;
8419                break;
8420        case offsetof(struct sk_msg_md, data_end):
8421                info->reg_type = PTR_TO_PACKET_END;
8422                if (size != sizeof(__u64))
8423                        return false;
8424                break;
8425        case offsetof(struct sk_msg_md, sk):
8426                if (size != sizeof(__u64))
8427                        return false;
8428                info->reg_type = PTR_TO_SOCKET;
8429                break;
8430        case bpf_ctx_range(struct sk_msg_md, family):
8431        case bpf_ctx_range(struct sk_msg_md, remote_ip4):
8432        case bpf_ctx_range(struct sk_msg_md, local_ip4):
8433        case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
8434        case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
8435        case bpf_ctx_range(struct sk_msg_md, remote_port):
8436        case bpf_ctx_range(struct sk_msg_md, local_port):
8437        case bpf_ctx_range(struct sk_msg_md, size):
8438                if (size != sizeof(__u32))
8439                        return false;
8440                break;
8441        default:
8442                return false;
8443        }
8444        return true;
8445}
8446
8447static bool flow_dissector_is_valid_access(int off, int size,
8448                                           enum bpf_access_type type,
8449                                           const struct bpf_prog *prog,
8450                                           struct bpf_insn_access_aux *info)
8451{
8452        const int size_default = sizeof(__u32);
8453
8454        if (off < 0 || off >= sizeof(struct __sk_buff))
8455                return false;
8456
8457        if (type == BPF_WRITE)
8458                return false;
8459
8460        switch (off) {
8461        case bpf_ctx_range(struct __sk_buff, data):
8462                if (size != size_default)
8463                        return false;
8464                info->reg_type = PTR_TO_PACKET;
8465                return true;
8466        case bpf_ctx_range(struct __sk_buff, data_end):
8467                if (size != size_default)
8468                        return false;
8469                info->reg_type = PTR_TO_PACKET_END;
8470                return true;
8471        case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
8472                if (size != sizeof(__u64))
8473                        return false;
8474                info->reg_type = PTR_TO_FLOW_KEYS;
8475                return true;
8476        default:
8477                return false;
8478        }
8479}
8480
8481static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
8482                                             const struct bpf_insn *si,
8483                                             struct bpf_insn *insn_buf,
8484                                             struct bpf_prog *prog,
8485                                             u32 *target_size)
8486
8487{
8488        struct bpf_insn *insn = insn_buf;
8489
8490        switch (si->off) {
8491        case offsetof(struct __sk_buff, data):
8492                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
8493                                      si->dst_reg, si->src_reg,
8494                                      offsetof(struct bpf_flow_dissector, data));
8495                break;
8496
8497        case offsetof(struct __sk_buff, data_end):
8498                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
8499                                      si->dst_reg, si->src_reg,
8500                                      offsetof(struct bpf_flow_dissector, data_end));
8501                break;
8502
8503        case offsetof(struct __sk_buff, flow_keys):
8504                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
8505                                      si->dst_reg, si->src_reg,
8506                                      offsetof(struct bpf_flow_dissector, flow_keys));
8507                break;
8508        }
8509
8510        return insn - insn_buf;
8511}
8512
8513static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
8514                                                  struct bpf_insn *insn)
8515{
8516        /* si->dst_reg = skb_shinfo(SKB); */
8517#ifdef NET_SKBUFF_DATA_USES_OFFSET
8518        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
8519                              BPF_REG_AX, si->src_reg,
8520                              offsetof(struct sk_buff, end));
8521        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
8522                              si->dst_reg, si->src_reg,
8523                              offsetof(struct sk_buff, head));
8524        *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
8525#else
8526        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
8527                              si->dst_reg, si->src_reg,
8528                              offsetof(struct sk_buff, end));
8529#endif
8530
8531        return insn;
8532}
8533
8534static u32 bpf_convert_ctx_access(enum bpf_access_type type,
8535                                  const struct bpf_insn *si,
8536                                  struct bpf_insn *insn_buf,
8537                                  struct bpf_prog *prog, u32 *target_size)
8538{
8539        struct bpf_insn *insn = insn_buf;
8540        int off;
8541
8542        switch (si->off) {
8543        case offsetof(struct __sk_buff, len):
8544                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8545                                      bpf_target_off(struct sk_buff, len, 4,
8546                                                     target_size));
8547                break;
8548
8549        case offsetof(struct __sk_buff, protocol):
8550                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8551                                      bpf_target_off(struct sk_buff, protocol, 2,
8552                                                     target_size));
8553                break;
8554
8555        case offsetof(struct __sk_buff, vlan_proto):
8556                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8557                                      bpf_target_off(struct sk_buff, vlan_proto, 2,
8558                                                     target_size));
8559                break;
8560
8561        case offsetof(struct __sk_buff, priority):
8562                if (type == BPF_WRITE)
8563                        *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8564                                              bpf_target_off(struct sk_buff, priority, 4,
8565                                                             target_size));
8566                else
8567                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8568                                              bpf_target_off(struct sk_buff, priority, 4,
8569                                                             target_size));
8570                break;
8571
8572        case offsetof(struct __sk_buff, ingress_ifindex):
8573                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8574                                      bpf_target_off(struct sk_buff, skb_iif, 4,
8575                                                     target_size));
8576                break;
8577
8578        case offsetof(struct __sk_buff, ifindex):
8579                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
8580                                      si->dst_reg, si->src_reg,
8581                                      offsetof(struct sk_buff, dev));
8582                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
8583                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8584                                      bpf_target_off(struct net_device, ifindex, 4,
8585                                                     target_size));
8586                break;
8587
8588        case offsetof(struct __sk_buff, hash):
8589                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8590                                      bpf_target_off(struct sk_buff, hash, 4,
8591                                                     target_size));
8592                break;
8593
8594        case offsetof(struct __sk_buff, mark):
8595                if (type == BPF_WRITE)
8596                        *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8597                                              bpf_target_off(struct sk_buff, mark, 4,
8598                                                             target_size));
8599                else
8600                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8601                                              bpf_target_off(struct sk_buff, mark, 4,
8602                                                             target_size));
8603                break;
8604
8605        case offsetof(struct __sk_buff, pkt_type):
8606                *target_size = 1;
8607                *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
8608                                      PKT_TYPE_OFFSET());
8609                *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
8610#ifdef __BIG_ENDIAN_BITFIELD
8611                *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
8612#endif
8613                break;
8614
8615        case offsetof(struct __sk_buff, queue_mapping):
8616                if (type == BPF_WRITE) {
8617                        *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
8618                        *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
8619                                              bpf_target_off(struct sk_buff,
8620                                                             queue_mapping,
8621                                                             2, target_size));
8622                } else {
8623                        *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8624                                              bpf_target_off(struct sk_buff,
8625                                                             queue_mapping,
8626                                                             2, target_size));
8627                }
8628                break;
8629
8630        case offsetof(struct __sk_buff, vlan_present):
8631                *target_size = 1;
8632                *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
8633                                      PKT_VLAN_PRESENT_OFFSET());
8634                if (PKT_VLAN_PRESENT_BIT)
8635                        *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
8636                if (PKT_VLAN_PRESENT_BIT < 7)
8637                        *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
8638                break;
8639
8640        case offsetof(struct __sk_buff, vlan_tci):
8641                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8642                                      bpf_target_off(struct sk_buff, vlan_tci, 2,
8643                                                     target_size));
8644                break;
8645
8646        case offsetof(struct __sk_buff, cb[0]) ...
8647             offsetofend(struct __sk_buff, cb[4]) - 1:
8648                BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
8649                BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
8650                              offsetof(struct qdisc_skb_cb, data)) %
8651                             sizeof(__u64));
8652
8653                prog->cb_access = 1;
8654                off  = si->off;
8655                off -= offsetof(struct __sk_buff, cb[0]);
8656                off += offsetof(struct sk_buff, cb);
8657                off += offsetof(struct qdisc_skb_cb, data);
8658                if (type == BPF_WRITE)
8659                        *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
8660                                              si->src_reg, off);
8661                else
8662                        *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
8663                                              si->src_reg, off);
8664                break;
8665
8666        case offsetof(struct __sk_buff, tc_classid):
8667                BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);
8668
8669                off  = si->off;
8670                off -= offsetof(struct __sk_buff, tc_classid);
8671                off += offsetof(struct sk_buff, cb);
8672                off += offsetof(struct qdisc_skb_cb, tc_classid);
8673                *target_size = 2;
8674                if (type == BPF_WRITE)
8675                        *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
8676                                              si->src_reg, off);
8677                else
8678                        *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
8679                                              si->src_reg, off);
8680                break;
8681
8682        case offsetof(struct __sk_buff, data):
8683                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
8684                                      si->dst_reg, si->src_reg,
8685                                      offsetof(struct sk_buff, data));
8686                break;
8687
8688        case offsetof(struct __sk_buff, data_meta):
8689                off  = si->off;
8690                off -= offsetof(struct __sk_buff, data_meta);
8691                off += offsetof(struct sk_buff, cb);
8692                off += offsetof(struct bpf_skb_data_end, data_meta);
8693                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
8694                                      si->src_reg, off);
8695                break;
8696
8697        case offsetof(struct __sk_buff, data_end):
8698                off  = si->off;
8699                off -= offsetof(struct __sk_buff, data_end);
8700                off += offsetof(struct sk_buff, cb);
8701                off += offsetof(struct bpf_skb_data_end, data_end);
8702                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
8703                                      si->src_reg, off);
8704                break;
8705
8706        case offsetof(struct __sk_buff, tc_index):
8707#ifdef CONFIG_NET_SCHED
8708                if (type == BPF_WRITE)
8709                        *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
8710                                              bpf_target_off(struct sk_buff, tc_index, 2,
8711                                                             target_size));
8712                else
8713                        *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8714                                              bpf_target_off(struct sk_buff, tc_index, 2,
8715                                                             target_size));
8716#else
8717                *target_size = 2;
8718                if (type == BPF_WRITE)
8719                        *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
8720                else
8721                        *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
8722#endif
8723                break;
8724
8725        case offsetof(struct __sk_buff, napi_id):
8726#if defined(CONFIG_NET_RX_BUSY_POLL)
8727                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8728                                      bpf_target_off(struct sk_buff, napi_id, 4,
8729                                                     target_size));
8730                *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
8731                *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
8732#else
8733                *target_size = 4;
8734                *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
8735#endif
8736                break;
8737        case offsetof(struct __sk_buff, family):
8738                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
8739
8740                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8741                                      si->dst_reg, si->src_reg,
8742                                      offsetof(struct sk_buff, sk));
8743                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8744                                      bpf_target_off(struct sock_common,
8745                                                     skc_family,
8746                                                     2, target_size));
8747                break;
8748        case offsetof(struct __sk_buff, remote_ip4):
8749                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
8750
8751                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8752                                      si->dst_reg, si->src_reg,
8753                                      offsetof(struct sk_buff, sk));
8754                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8755                                      bpf_target_off(struct sock_common,
8756                                                     skc_daddr,
8757                                                     4, target_size));
8758                break;
8759        case offsetof(struct __sk_buff, local_ip4):
8760                BUILD_BUG_ON(sizeof_field(struct sock_common,
8761                                          skc_rcv_saddr) != 4);
8762
8763                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8764                                      si->dst_reg, si->src_reg,
8765                                      offsetof(struct sk_buff, sk));
8766                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8767                                      bpf_target_off(struct sock_common,
8768                                                     skc_rcv_saddr,
8769                                                     4, target_size));
8770                break;
8771        case offsetof(struct __sk_buff, remote_ip6[0]) ...
8772             offsetof(struct __sk_buff, remote_ip6[3]):
8773#if IS_ENABLED(CONFIG_IPV6)
8774                BUILD_BUG_ON(sizeof_field(struct sock_common,
8775                                          skc_v6_daddr.s6_addr32[0]) != 4);
8776
8777                off = si->off;
8778                off -= offsetof(struct __sk_buff, remote_ip6[0]);
8779
8780                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8781                                      si->dst_reg, si->src_reg,
8782                                      offsetof(struct sk_buff, sk));
8783                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8784                                      offsetof(struct sock_common,
8785                                               skc_v6_daddr.s6_addr32[0]) +
8786                                      off);
8787#else
8788                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8789#endif
8790                break;
8791        case offsetof(struct __sk_buff, local_ip6[0]) ...
8792             offsetof(struct __sk_buff, local_ip6[3]):
8793#if IS_ENABLED(CONFIG_IPV6)
8794                BUILD_BUG_ON(sizeof_field(struct sock_common,
8795                                          skc_v6_rcv_saddr.s6_addr32[0]) != 4);
8796
8797                off = si->off;
8798                off -= offsetof(struct __sk_buff, local_ip6[0]);
8799
8800                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8801                                      si->dst_reg, si->src_reg,
8802                                      offsetof(struct sk_buff, sk));
8803                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8804                                      offsetof(struct sock_common,
8805                                               skc_v6_rcv_saddr.s6_addr32[0]) +
8806                                      off);
8807#else
8808                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8809#endif
8810                break;
8811
8812        case offsetof(struct __sk_buff, remote_port):
8813                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
8814
8815                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8816                                      si->dst_reg, si->src_reg,
8817                                      offsetof(struct sk_buff, sk));
8818                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8819                                      bpf_target_off(struct sock_common,
8820                                                     skc_dport,
8821                                                     2, target_size));
8822#ifndef __BIG_ENDIAN_BITFIELD
8823                *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
8824#endif
8825                break;
8826
8827        case offsetof(struct __sk_buff, local_port):
8828                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
8829
8830                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8831                                      si->dst_reg, si->src_reg,
8832                                      offsetof(struct sk_buff, sk));
8833                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8834                                      bpf_target_off(struct sock_common,
8835                                                     skc_num, 2, target_size));
8836                break;
8837
8838        case offsetof(struct __sk_buff, tstamp):
8839                BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
8840
8841                if (type == BPF_WRITE)
8842                        *insn++ = BPF_STX_MEM(BPF_DW,
8843                                              si->dst_reg, si->src_reg,
8844                                              bpf_target_off(struct sk_buff,
8845                                                             tstamp, 8,
8846                                                             target_size));
8847                else
8848                        *insn++ = BPF_LDX_MEM(BPF_DW,
8849                                              si->dst_reg, si->src_reg,
8850                                              bpf_target_off(struct sk_buff,
8851                                                             tstamp, 8,
8852                                                             target_size));
8853                break;
8854
8855        case offsetof(struct __sk_buff, gso_segs):
8856                insn = bpf_convert_shinfo_access(si, insn);
8857                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
8858                                      si->dst_reg, si->dst_reg,
8859                                      bpf_target_off(struct skb_shared_info,
8860                                                     gso_segs, 2,
8861                                                     target_size));
8862                break;
8863        case offsetof(struct __sk_buff, gso_size):
8864                insn = bpf_convert_shinfo_access(si, insn);
8865                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
8866                                      si->dst_reg, si->dst_reg,
8867                                      bpf_target_off(struct skb_shared_info,
8868                                                     gso_size, 2,
8869                                                     target_size));
8870                break;
8871        case offsetof(struct __sk_buff, wire_len):
8872                BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
8873
8874                off = si->off;
8875                off -= offsetof(struct __sk_buff, wire_len);
8876                off += offsetof(struct sk_buff, cb);
8877                off += offsetof(struct qdisc_skb_cb, pkt_len);
8878                *target_size = 4;
8879                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
8880                break;
8881
8882        case offsetof(struct __sk_buff, sk):
8883                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8884                                      si->dst_reg, si->src_reg,
8885                                      offsetof(struct sk_buff, sk));
8886                break;
8887        }
8888
8889        return insn - insn_buf;
8890}
8891
8892u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
8893                                const struct bpf_insn *si,
8894                                struct bpf_insn *insn_buf,
8895                                struct bpf_prog *prog, u32 *target_size)
8896{
8897        struct bpf_insn *insn = insn_buf;
8898        int off;
8899
8900        switch (si->off) {
8901        case offsetof(struct bpf_sock, bound_dev_if):
8902                BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
8903
8904                if (type == BPF_WRITE)
8905                        *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8906                                        offsetof(struct sock, sk_bound_dev_if));
8907                else
8908                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8909                                      offsetof(struct sock, sk_bound_dev_if));
8910                break;
8911
8912        case offsetof(struct bpf_sock, mark):
8913                BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
8914
8915                if (type == BPF_WRITE)
8916                        *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8917                                        offsetof(struct sock, sk_mark));
8918                else
8919                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8920                                      offsetof(struct sock, sk_mark));
8921                break;
8922
8923        case offsetof(struct bpf_sock, priority):
8924                BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
8925
8926                if (type == BPF_WRITE)
8927                        *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8928                                        offsetof(struct sock, sk_priority));
8929                else
8930                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8931                                      offsetof(struct sock, sk_priority));
8932                break;
8933
8934        case offsetof(struct bpf_sock, family):
8935                *insn++ = BPF_LDX_MEM(
8936                        BPF_FIELD_SIZEOF(struct sock_common, skc_family),
8937                        si->dst_reg, si->src_reg,
8938                        bpf_target_off(struct sock_common,
8939                                       skc_family,
8940                                       sizeof_field(struct sock_common,
8941                                                    skc_family),
8942                                       target_size));
8943                break;
8944
8945        case offsetof(struct bpf_sock, type):
8946                *insn++ = BPF_LDX_MEM(
8947                        BPF_FIELD_SIZEOF(struct sock, sk_type),
8948                        si->dst_reg, si->src_reg,
8949                        bpf_target_off(struct sock, sk_type,
8950                                       sizeof_field(struct sock, sk_type),
8951                                       target_size));
8952                break;
8953
8954        case offsetof(struct bpf_sock, protocol):
8955                *insn++ = BPF_LDX_MEM(
8956                        BPF_FIELD_SIZEOF(struct sock, sk_protocol),
8957                        si->dst_reg, si->src_reg,
8958                        bpf_target_off(struct sock, sk_protocol,
8959                                       sizeof_field(struct sock, sk_protocol),
8960                                       target_size));
8961                break;
8962
8963        case offsetof(struct bpf_sock, src_ip4):
8964                *insn++ = BPF_LDX_MEM(
8965                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8966                        bpf_target_off(struct sock_common, skc_rcv_saddr,
8967                                       sizeof_field(struct sock_common,
8968                                                    skc_rcv_saddr),
8969                                       target_size));
8970                break;
8971
8972        case offsetof(struct bpf_sock, dst_ip4):
8973                *insn++ = BPF_LDX_MEM(
8974                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8975                        bpf_target_off(struct sock_common, skc_daddr,
8976                                       sizeof_field(struct sock_common,
8977                                                    skc_daddr),
8978                                       target_size));
8979                break;
8980
8981        case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
8982#if IS_ENABLED(CONFIG_IPV6)
8983                off = si->off;
8984                off -= offsetof(struct bpf_sock, src_ip6[0]);
8985                *insn++ = BPF_LDX_MEM(
8986                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8987                        bpf_target_off(
8988                                struct sock_common,
8989                                skc_v6_rcv_saddr.s6_addr32[0],
8990                                sizeof_field(struct sock_common,
8991                                             skc_v6_rcv_saddr.s6_addr32[0]),
8992                                target_size) + off);
8993#else
8994                (void)off;
8995                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8996#endif
8997                break;
8998
8999        case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
9000#if IS_ENABLED(CONFIG_IPV6)
9001                off = si->off;
9002                off -= offsetof(struct bpf_sock, dst_ip6[0]);
9003                *insn++ = BPF_LDX_MEM(
9004                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9005                        bpf_target_off(struct sock_common,
9006                                       skc_v6_daddr.s6_addr32[0],
9007                                       sizeof_field(struct sock_common,
9008                                                    skc_v6_daddr.s6_addr32[0]),
9009                                       target_size) + off);
9010#else
9011                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9012                *target_size = 4;
9013#endif
9014                break;
9015
9016        case offsetof(struct bpf_sock, src_port):
9017                *insn++ = BPF_LDX_MEM(
9018                        BPF_FIELD_SIZEOF(struct sock_common, skc_num),
9019                        si->dst_reg, si->src_reg,
9020                        bpf_target_off(struct sock_common, skc_num,
9021                                       sizeof_field(struct sock_common,
9022                                                    skc_num),
9023                                       target_size));
9024                break;
9025
9026        case offsetof(struct bpf_sock, dst_port):
9027                *insn++ = BPF_LDX_MEM(
9028                        BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
9029                        si->dst_reg, si->src_reg,
9030                        bpf_target_off(struct sock_common, skc_dport,
9031                                       sizeof_field(struct sock_common,
9032                                                    skc_dport),
9033                                       target_size));
9034                break;
9035
9036        case offsetof(struct bpf_sock, state):
9037                *insn++ = BPF_LDX_MEM(
9038                        BPF_FIELD_SIZEOF(struct sock_common, skc_state),
9039                        si->dst_reg, si->src_reg,
9040                        bpf_target_off(struct sock_common, skc_state,
9041                                       sizeof_field(struct sock_common,
9042                                                    skc_state),
9043                                       target_size));
9044                break;
9045        case offsetof(struct bpf_sock, rx_queue_mapping):
9046#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
9047                *insn++ = BPF_LDX_MEM(
9048                        BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
9049                        si->dst_reg, si->src_reg,
9050                        bpf_target_off(struct sock, sk_rx_queue_mapping,
9051                                       sizeof_field(struct sock,
9052                                                    sk_rx_queue_mapping),
9053                                       target_size));
9054                *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
9055                                      1);
9056                *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
9057#else
9058                *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
9059                *target_size = 2;
9060#endif
9061                break;
9062        }
9063
9064        return insn - insn_buf;
9065}
9066
9067static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
9068                                         const struct bpf_insn *si,
9069                                         struct bpf_insn *insn_buf,
9070                                         struct bpf_prog *prog, u32 *target_size)
9071{
9072        struct bpf_insn *insn = insn_buf;
9073
9074        switch (si->off) {
9075        case offsetof(struct __sk_buff, ifindex):
9076                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
9077                                      si->dst_reg, si->src_reg,
9078                                      offsetof(struct sk_buff, dev));
9079                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9080                                      bpf_target_off(struct net_device, ifindex, 4,
9081                                                     target_size));
9082                break;
9083        default:
9084                return bpf_convert_ctx_access(type, si, insn_buf, prog,
9085                                              target_size);
9086        }
9087
9088        return insn - insn_buf;
9089}
9090
9091static u32 xdp_convert_ctx_access(enum bpf_access_type type,
9092                                  const struct bpf_insn *si,
9093                                  struct bpf_insn *insn_buf,
9094                                  struct bpf_prog *prog, u32 *target_size)
9095{
9096        struct bpf_insn *insn = insn_buf;
9097
9098        switch (si->off) {
9099        case offsetof(struct xdp_md, data):
9100                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
9101                                      si->dst_reg, si->src_reg,
9102                                      offsetof(struct xdp_buff, data));
9103                break;
9104        case offsetof(struct xdp_md, data_meta):
9105                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
9106                                      si->dst_reg, si->src_reg,
9107                                      offsetof(struct xdp_buff, data_meta));
9108                break;
9109        case offsetof(struct xdp_md, data_end):
9110                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
9111                                      si->dst_reg, si->src_reg,
9112                                      offsetof(struct xdp_buff, data_end));
9113                break;
9114        case offsetof(struct xdp_md, ingress_ifindex):
9115                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
9116                                      si->dst_reg, si->src_reg,
9117                                      offsetof(struct xdp_buff, rxq));
9118                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
9119                                      si->dst_reg, si->dst_reg,
9120                                      offsetof(struct xdp_rxq_info, dev));
9121                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9122                                      offsetof(struct net_device, ifindex));
9123                break;
9124        case offsetof(struct xdp_md, rx_queue_index):
9125                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
9126                                      si->dst_reg, si->src_reg,
9127                                      offsetof(struct xdp_buff, rxq));
9128                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9129                                      offsetof(struct xdp_rxq_info,
9130                                               queue_index));
9131                break;
9132        case offsetof(struct xdp_md, egress_ifindex):
9133                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
9134                                      si->dst_reg, si->src_reg,
9135                                      offsetof(struct xdp_buff, txq));
9136                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
9137                                      si->dst_reg, si->dst_reg,
9138                                      offsetof(struct xdp_txq_info, dev));
9139                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9140                                      offsetof(struct net_device, ifindex));
9141                break;
9142        }
9143
9144        return insn - insn_buf;
9145}
9146
9147/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
9148 * context Structure, F is Field in context structure that contains a pointer
9149 * to Nested Structure of type NS that has the field NF.
9150 *
9151 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
9152 * sure that SIZE is not greater than actual size of S.F.NF.
9153 *
9154 * If offset OFF is provided, the load happens from that offset relative to
9155 * offset of NF.
9156 */
9157#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF)          \
9158        do {                                                                   \
9159                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg,     \
9160                                      si->src_reg, offsetof(S, F));            \
9161                *insn++ = BPF_LDX_MEM(                                         \
9162                        SIZE, si->dst_reg, si->dst_reg,                        \
9163                        bpf_target_off(NS, NF, sizeof_field(NS, NF),           \
9164                                       target_size)                            \
9165                                + OFF);                                        \
9166        } while (0)
9167
9168#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF)                              \
9169        SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF,                     \
9170                                             BPF_FIELD_SIZEOF(NS, NF), 0)
9171
9172/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
9173 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
9174 *
9175 * In addition it uses Temporary Field TF (member of struct S) as the 3rd
9176 * "register" since two registers available in convert_ctx_access are not
9177 * enough: we can't override neither SRC, since it contains value to store, nor
9178 * DST since it contains pointer to context that may be used by later
9179 * instructions. But we need a temporary place to save pointer to nested
9180 * structure whose field we want to store to.
9181 */
9182#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF)          \
9183        do {                                                                   \
9184                int tmp_reg = BPF_REG_9;                                       \
9185                if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)          \
9186                        --tmp_reg;                                             \
9187                if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)          \
9188                        --tmp_reg;                                             \
9189                *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg,            \
9190                                      offsetof(S, TF));                        \
9191                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,         \
9192                                      si->dst_reg, offsetof(S, F));            \
9193                *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg,              \
9194                        bpf_target_off(NS, NF, sizeof_field(NS, NF),           \
9195                                       target_size)                            \
9196                                + OFF);                                        \
9197                *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg,            \
9198                                      offsetof(S, TF));                        \
9199        } while (0)
9200
9201#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
9202                                                      TF)                      \
9203        do {                                                                   \
9204                if (type == BPF_WRITE) {                                       \
9205                        SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE,   \
9206                                                         OFF, TF);             \
9207                } else {                                                       \
9208                        SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(                  \
9209                                S, NS, F, NF, SIZE, OFF);  \
9210                }                                                              \
9211        } while (0)
9212
9213#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF)                 \
9214        SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(                         \
9215                S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
9216
9217static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
9218                                        const struct bpf_insn *si,
9219                                        struct bpf_insn *insn_buf,
9220                                        struct bpf_prog *prog, u32 *target_size)
9221{
9222        int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
9223        struct bpf_insn *insn = insn_buf;
9224
9225        switch (si->off) {
9226        case offsetof(struct bpf_sock_addr, user_family):
9227                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9228                                            struct sockaddr, uaddr, sa_family);
9229                break;
9230
9231        case offsetof(struct bpf_sock_addr, user_ip4):
9232                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9233                        struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
9234                        sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
9235                break;
9236
9237        case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
9238                off = si->off;
9239                off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
9240                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9241                        struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
9242                        sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
9243                        tmp_reg);
9244                break;
9245
9246        case offsetof(struct bpf_sock_addr, user_port):
9247                /* To get port we need to know sa_family first and then treat
9248                 * sockaddr as either sockaddr_in or sockaddr_in6.
9249                 * Though we can simplify since port field has same offset and
9250                 * size in both structures.
9251                 * Here we check this invariant and use just one of the
9252                 * structures if it's true.
9253                 */
9254                BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
9255                             offsetof(struct sockaddr_in6, sin6_port));
9256                BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
9257                             sizeof_field(struct sockaddr_in6, sin6_port));
9258                /* Account for sin6_port being smaller than user_port. */
9259                port_size = min(port_size, BPF_LDST_BYTES(si));
9260                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9261                        struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
9262                        sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
9263                break;
9264
9265        case offsetof(struct bpf_sock_addr, family):
9266                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9267                                            struct sock, sk, sk_family);
9268                break;
9269
9270        case offsetof(struct bpf_sock_addr, type):
9271                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9272                                            struct sock, sk, sk_type);
9273                break;
9274
9275        case offsetof(struct bpf_sock_addr, protocol):
9276                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9277                                            struct sock, sk, sk_protocol);
9278                break;
9279
9280        case offsetof(struct bpf_sock_addr, msg_src_ip4):
9281                /* Treat t_ctx as struct in_addr for msg_src_ip4. */
9282                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9283                        struct bpf_sock_addr_kern, struct in_addr, t_ctx,
9284                        s_addr, BPF_SIZE(si->code), 0, tmp_reg);
9285                break;
9286
9287        case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
9288                                msg_src_ip6[3]):
9289                off = si->off;
9290                off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
9291                /* Treat t_ctx as struct in6_addr for msg_src_ip6. */
9292                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9293                        struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
9294                        s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
9295                break;
9296        case offsetof(struct bpf_sock_addr, sk):
9297                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
9298                                      si->dst_reg, si->src_reg,
9299                                      offsetof(struct bpf_sock_addr_kern, sk));
9300                break;
9301        }
9302
9303        return insn - insn_buf;
9304}
9305
9306static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
9307                                       const struct bpf_insn *si,
9308                                       struct bpf_insn *insn_buf,
9309                                       struct bpf_prog *prog,
9310                                       u32 *target_size)
9311{
9312        struct bpf_insn *insn = insn_buf;
9313        int off;
9314
9315/* Helper macro for adding read access to tcp_sock or sock fields. */
9316#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                         \
9317        do {                                                                  \
9318                int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2;     \
9319                BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) >                   \
9320                             sizeof_field(struct bpf_sock_ops, BPF_FIELD));   \
9321                if (si->dst_reg == reg || si->src_reg == reg)                 \
9322                        reg--;                                                \
9323                if (si->dst_reg == reg || si->src_reg == reg)                 \
9324                        reg--;                                                \
9325                if (si->dst_reg == si->src_reg) {                             \
9326                        *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,       \
9327                                          offsetof(struct bpf_sock_ops_kern,  \
9328                                          temp));                             \
9329                        fullsock_reg = reg;                                   \
9330                        jmp += 2;                                             \
9331                }                                                             \
9332                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
9333                                                struct bpf_sock_ops_kern,     \
9334                                                is_fullsock),                 \
9335                                      fullsock_reg, si->src_reg,              \
9336                                      offsetof(struct bpf_sock_ops_kern,      \
9337                                               is_fullsock));                 \
9338                *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);         \
9339                if (si->dst_reg == si->src_reg)                               \
9340                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,       \
9341                                      offsetof(struct bpf_sock_ops_kern,      \
9342                                      temp));                                 \
9343                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
9344                                                struct bpf_sock_ops_kern, sk),\
9345                                      si->dst_reg, si->src_reg,               \
9346                                      offsetof(struct bpf_sock_ops_kern, sk));\
9347                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,                   \
9348                                                       OBJ_FIELD),            \
9349                                      si->dst_reg, si->dst_reg,               \
9350                                      offsetof(OBJ, OBJ_FIELD));              \
9351                if (si->dst_reg == si->src_reg) {                             \
9352                        *insn++ = BPF_JMP_A(1);                               \
9353                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,       \
9354                                      offsetof(struct bpf_sock_ops_kern,      \
9355                                      temp));                                 \
9356                }                                                             \
9357        } while (0)
9358
9359#define SOCK_OPS_GET_SK()                                                             \
9360        do {                                                                  \
9361                int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1;     \
9362                if (si->dst_reg == reg || si->src_reg == reg)                 \
9363                        reg--;                                                \
9364                if (si->dst_reg == reg || si->src_reg == reg)                 \
9365                        reg--;                                                \
9366                if (si->dst_reg == si->src_reg) {                             \
9367                        *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,       \
9368                                          offsetof(struct bpf_sock_ops_kern,  \
9369                                          temp));                             \
9370                        fullsock_reg = reg;                                   \
9371                        jmp += 2;                                             \
9372                }                                                             \
9373                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
9374                                                struct bpf_sock_ops_kern,     \
9375                                                is_fullsock),                 \
9376                                      fullsock_reg, si->src_reg,              \
9377                                      offsetof(struct bpf_sock_ops_kern,      \
9378                                               is_fullsock));                 \
9379                *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);         \
9380                if (si->dst_reg == si->src_reg)                               \
9381                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,       \
9382                                      offsetof(struct bpf_sock_ops_kern,      \
9383                                      temp));                                 \
9384                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
9385                                                struct bpf_sock_ops_kern, sk),\
9386                                      si->dst_reg, si->src_reg,               \
9387                                      offsetof(struct bpf_sock_ops_kern, sk));\
9388                if (si->dst_reg == si->src_reg) {                             \
9389                        *insn++ = BPF_JMP_A(1);                               \
9390                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,       \
9391                                      offsetof(struct bpf_sock_ops_kern,      \
9392                                      temp));                                 \
9393                }                                                             \
9394        } while (0)
9395
9396#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
9397                SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
9398
9399/* Helper macro for adding write access to tcp_sock or sock fields.
9400 * The macro is called with two registers, dst_reg which contains a pointer
9401 * to ctx (context) and src_reg which contains the value that should be
9402 * stored. However, we need an additional register since we cannot overwrite
9403 * dst_reg because it may be used later in the program.
9404 * Instead we "borrow" one of the other register. We first save its value
9405 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
9406 * it at the end of the macro.
9407 */
9408#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                         \
9409        do {                                                                  \
9410                int reg = BPF_REG_9;                                          \
9411                BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) >                   \
9412                             sizeof_field(struct bpf_sock_ops, BPF_FIELD));   \
9413                if (si->dst_reg == reg || si->src_reg == reg)                 \
9414                        reg--;                                                \
9415                if (si->dst_reg == reg || si->src_reg == reg)                 \
9416                        reg--;                                                \
9417                *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,               \
9418                                      offsetof(struct bpf_sock_ops_kern,      \
9419                                               temp));                        \
9420                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
9421                                                struct bpf_sock_ops_kern,     \
9422                                                is_fullsock),                 \
9423                                      reg, si->dst_reg,                       \
9424                                      offsetof(struct bpf_sock_ops_kern,      \
9425                                               is_fullsock));                 \
9426                *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);                    \
9427                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
9428                                                struct bpf_sock_ops_kern, sk),\
9429                                      reg, si->dst_reg,                       \
9430                                      offsetof(struct bpf_sock_ops_kern, sk));\
9431                *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),       \
9432                                      reg, si->src_reg,                       \
9433                                      offsetof(OBJ, OBJ_FIELD));              \
9434                *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,               \
9435                                      offsetof(struct bpf_sock_ops_kern,      \
9436                                               temp));                        \
9437        } while (0)
9438
9439#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)            \
9440        do {                                                                  \
9441                if (TYPE == BPF_WRITE)                                        \
9442                        SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
9443                else                                                          \
9444                        SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
9445        } while (0)
9446
9447        if (insn > insn_buf)
9448                return insn - insn_buf;
9449
9450        switch (si->off) {
9451        case offsetof(struct bpf_sock_ops, op):
9452                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9453                                                       op),
9454                                      si->dst_reg, si->src_reg,
9455                                      offsetof(struct bpf_sock_ops_kern, op));
9456                break;
9457
9458        case offsetof(struct bpf_sock_ops, replylong[0]) ...
9459             offsetof(struct bpf_sock_ops, replylong[3]):
9460                BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
9461                             sizeof_field(struct bpf_sock_ops_kern, reply));
9462                BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
9463                             sizeof_field(struct bpf_sock_ops_kern, replylong));
9464                off = si->off;
9465                off -= offsetof(struct bpf_sock_ops, replylong[0]);
9466                off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
9467                if (type == BPF_WRITE)
9468                        *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
9469                                              off);
9470                else
9471                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9472                                              off);
9473                break;
9474
9475        case offsetof(struct bpf_sock_ops, family):
9476                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
9477
9478                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9479                                              struct bpf_sock_ops_kern, sk),
9480                                      si->dst_reg, si->src_reg,
9481                                      offsetof(struct bpf_sock_ops_kern, sk));
9482                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9483                                      offsetof(struct sock_common, skc_family));
9484                break;
9485
9486        case offsetof(struct bpf_sock_ops, remote_ip4):
9487                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
9488
9489                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9490                                                struct bpf_sock_ops_kern, sk),
9491                                      si->dst_reg, si->src_reg,
9492                                      offsetof(struct bpf_sock_ops_kern, sk));
9493                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9494                                      offsetof(struct sock_common, skc_daddr));
9495                break;
9496
9497        case offsetof(struct bpf_sock_ops, local_ip4):
9498                BUILD_BUG_ON(sizeof_field(struct sock_common,
9499                                          skc_rcv_saddr) != 4);
9500
9501                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9502                                              struct bpf_sock_ops_kern, sk),
9503                                      si->dst_reg, si->src_reg,
9504                                      offsetof(struct bpf_sock_ops_kern, sk));
9505                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9506                                      offsetof(struct sock_common,
9507                                               skc_rcv_saddr));
9508                break;
9509
9510        case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
9511             offsetof(struct bpf_sock_ops, remote_ip6[3]):
9512#if IS_ENABLED(CONFIG_IPV6)
9513                BUILD_BUG_ON(sizeof_field(struct sock_common,
9514                                          skc_v6_daddr.s6_addr32[0]) != 4);
9515
9516                off = si->off;
9517                off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
9518                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9519                                                struct bpf_sock_ops_kern, sk),
9520                                      si->dst_reg, si->src_reg,
9521                                      offsetof(struct bpf_sock_ops_kern, sk));
9522                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9523                                      offsetof(struct sock_common,
9524                                               skc_v6_daddr.s6_addr32[0]) +
9525                                      off);
9526#else
9527                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9528#endif
9529                break;
9530
9531        case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
9532             offsetof(struct bpf_sock_ops, local_ip6[3]):
9533#if IS_ENABLED(CONFIG_IPV6)
9534                BUILD_BUG_ON(sizeof_field(struct sock_common,
9535                                          skc_v6_rcv_saddr.s6_addr32[0]) != 4);
9536
9537                off = si->off;
9538                off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
9539                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9540                                                struct bpf_sock_ops_kern, sk),
9541                                      si->dst_reg, si->src_reg,
9542                                      offsetof(struct bpf_sock_ops_kern, sk));
9543                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9544                                      offsetof(struct sock_common,
9545                                               skc_v6_rcv_saddr.s6_addr32[0]) +
9546                                      off);
9547#else
9548                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9549#endif
9550                break;
9551
9552        case offsetof(struct bpf_sock_ops, remote_port):
9553                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
9554
9555                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9556                                                struct bpf_sock_ops_kern, sk),
9557                                      si->dst_reg, si->src_reg,
9558                                      offsetof(struct bpf_sock_ops_kern, sk));
9559                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9560                                      offsetof(struct sock_common, skc_dport));
9561#ifndef __BIG_ENDIAN_BITFIELD
9562                *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
9563#endif
9564                break;
9565
9566        case offsetof(struct bpf_sock_ops, local_port):
9567                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
9568
9569                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9570                                                struct bpf_sock_ops_kern, sk),
9571                                      si->dst_reg, si->src_reg,
9572                                      offsetof(struct bpf_sock_ops_kern, sk));
9573                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9574                                      offsetof(struct sock_common, skc_num));
9575                break;
9576
9577        case offsetof(struct bpf_sock_ops, is_fullsock):
9578                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9579                                                struct bpf_sock_ops_kern,
9580                                                is_fullsock),
9581                                      si->dst_reg, si->src_reg,
9582                                      offsetof(struct bpf_sock_ops_kern,
9583                                               is_fullsock));
9584                break;
9585
9586        case offsetof(struct bpf_sock_ops, state):
9587                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
9588
9589                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9590                                                struct bpf_sock_ops_kern, sk),
9591                                      si->dst_reg, si->src_reg,
9592                                      offsetof(struct bpf_sock_ops_kern, sk));
9593                *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
9594                                      offsetof(struct sock_common, skc_state));
9595                break;
9596
9597        case offsetof(struct bpf_sock_ops, rtt_min):
9598                BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
9599                             sizeof(struct minmax));
9600                BUILD_BUG_ON(sizeof(struct minmax) <
9601                             sizeof(struct minmax_sample));
9602
9603                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9604                                                struct bpf_sock_ops_kern, sk),
9605                                      si->dst_reg, si->src_reg,
9606                                      offsetof(struct bpf_sock_ops_kern, sk));
9607                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9608                                      offsetof(struct tcp_sock, rtt_min) +
9609                                      sizeof_field(struct minmax_sample, t));
9610                break;
9611
9612        case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
9613                SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
9614                                   struct tcp_sock);
9615                break;
9616
9617        case offsetof(struct bpf_sock_ops, sk_txhash):
9618                SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
9619                                          struct sock, type);
9620                break;
9621        case offsetof(struct bpf_sock_ops, snd_cwnd):
9622                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
9623                break;
9624        case offsetof(struct bpf_sock_ops, srtt_us):
9625                SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
9626                break;
9627        case offsetof(struct bpf_sock_ops, snd_ssthresh):
9628                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
9629                break;
9630        case offsetof(struct bpf_sock_ops, rcv_nxt):
9631                SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
9632                break;
9633        case offsetof(struct bpf_sock_ops, snd_nxt):
9634                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
9635                break;
9636        case offsetof(struct bpf_sock_ops, snd_una):
9637                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
9638                break;
9639        case offsetof(struct bpf_sock_ops, mss_cache):
9640                SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
9641                break;
9642        case offsetof(struct bpf_sock_ops, ecn_flags):
9643                SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
9644                break;
9645        case offsetof(struct bpf_sock_ops, rate_delivered):
9646                SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
9647                break;
9648        case offsetof(struct bpf_sock_ops, rate_interval_us):
9649                SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
9650                break;
9651        case offsetof(struct bpf_sock_ops, packets_out):
9652                SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
9653                break;
9654        case offsetof(struct bpf_sock_ops, retrans_out):
9655                SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
9656                break;
9657        case offsetof(struct bpf_sock_ops, total_retrans):
9658                SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
9659                break;
9660        case offsetof(struct bpf_sock_ops, segs_in):
9661                SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
9662                break;
9663        case offsetof(struct bpf_sock_ops, data_segs_in):
9664                SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
9665                break;
9666        case offsetof(struct bpf_sock_ops, segs_out):
9667                SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
9668                break;
9669        case offsetof(struct bpf_sock_ops, data_segs_out):
9670                SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
9671                break;
9672        case offsetof(struct bpf_sock_ops, lost_out):
9673                SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
9674                break;
9675        case offsetof(struct bpf_sock_ops, sacked_out):
9676                SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
9677                break;
9678        case offsetof(struct bpf_sock_ops, bytes_received):
9679                SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
9680                break;
9681        case offsetof(struct bpf_sock_ops, bytes_acked):
9682                SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
9683                break;
9684        case offsetof(struct bpf_sock_ops, sk):
9685                SOCK_OPS_GET_SK();
9686                break;
9687        case offsetof(struct bpf_sock_ops, skb_data_end):
9688                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9689                                                       skb_data_end),
9690                                      si->dst_reg, si->src_reg,
9691                                      offsetof(struct bpf_sock_ops_kern,
9692                                               skb_data_end));
9693                break;
9694        case offsetof(struct bpf_sock_ops, skb_data):
9695                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9696                                                       skb),
9697                                      si->dst_reg, si->src_reg,
9698                                      offsetof(struct bpf_sock_ops_kern,
9699                                               skb));
9700                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9701                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
9702                                      si->dst_reg, si->dst_reg,
9703                                      offsetof(struct sk_buff, data));
9704                break;
9705        case offsetof(struct bpf_sock_ops, skb_len):
9706                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9707                                                       skb),
9708                                      si->dst_reg, si->src_reg,
9709                                      offsetof(struct bpf_sock_ops_kern,
9710                                               skb));
9711                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9712                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
9713                                      si->dst_reg, si->dst_reg,
9714                                      offsetof(struct sk_buff, len));
9715                break;
9716        case offsetof(struct bpf_sock_ops, skb_tcp_flags):
9717                off = offsetof(struct sk_buff, cb);
9718                off += offsetof(struct tcp_skb_cb, tcp_flags);
9719                *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
9720                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9721                                                       skb),
9722                                      si->dst_reg, si->src_reg,
9723                                      offsetof(struct bpf_sock_ops_kern,
9724                                               skb));
9725                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9726                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
9727                                                       tcp_flags),
9728                                      si->dst_reg, si->dst_reg, off);
9729                break;
9730        }
9731        return insn - insn_buf;
9732}
9733
9734/* data_end = skb->data + skb_headlen() */
9735static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
9736                                                    struct bpf_insn *insn)
9737{
9738        /* si->dst_reg = skb->data */
9739        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
9740                              si->dst_reg, si->src_reg,
9741                              offsetof(struct sk_buff, data));
9742        /* AX = skb->len */
9743        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
9744                              BPF_REG_AX, si->src_reg,
9745                              offsetof(struct sk_buff, len));
9746        /* si->dst_reg = skb->data + skb->len */
9747        *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
9748        /* AX = skb->data_len */
9749        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
9750                              BPF_REG_AX, si->src_reg,
9751                              offsetof(struct sk_buff, data_len));
9752        /* si->dst_reg = skb->data + skb->len - skb->data_len */
9753        *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
9754
9755        return insn;
9756}
9757
9758static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
9759                                     const struct bpf_insn *si,
9760                                     struct bpf_insn *insn_buf,
9761                                     struct bpf_prog *prog, u32 *target_size)
9762{
9763        struct bpf_insn *insn = insn_buf;
9764
9765        switch (si->off) {
9766        case offsetof(struct __sk_buff, data_end):
9767                insn = bpf_convert_data_end_access(si, insn);
9768                break;
9769        default:
9770                return bpf_convert_ctx_access(type, si, insn_buf, prog,
9771                                              target_size);
9772        }
9773
9774        return insn - insn_buf;
9775}
9776
9777static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
9778                                     const struct bpf_insn *si,
9779                                     struct bpf_insn *insn_buf,
9780                                     struct bpf_prog *prog, u32 *target_size)
9781{
9782        struct bpf_insn *insn = insn_buf;
9783#if IS_ENABLED(CONFIG_IPV6)
9784        int off;
9785#endif
9786
9787        /* convert ctx uses the fact sg element is first in struct */
9788        BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
9789
9790        switch (si->off) {
9791        case offsetof(struct sk_msg_md, data):
9792                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
9793                                      si->dst_reg, si->src_reg,
9794                                      offsetof(struct sk_msg, data));
9795                break;
9796        case offsetof(struct sk_msg_md, data_end):
9797                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
9798                                      si->dst_reg, si->src_reg,
9799                                      offsetof(struct sk_msg, data_end));
9800                break;
9801        case offsetof(struct sk_msg_md, family):
9802                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
9803
9804                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9805                                              struct sk_msg, sk),
9806                                      si->dst_reg, si->src_reg,
9807                                      offsetof(struct sk_msg, sk));
9808                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9809                                      offsetof(struct sock_common, skc_family));
9810                break;
9811
9812        case offsetof(struct sk_msg_md, remote_ip4):
9813                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
9814
9815                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9816                                                struct sk_msg, sk),
9817                                      si->dst_reg, si->src_reg,
9818                                      offsetof(struct sk_msg, sk));
9819                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9820                                      offsetof(struct sock_common, skc_daddr));
9821                break;
9822
9823        case offsetof(struct sk_msg_md, local_ip4):
9824                BUILD_BUG_ON(sizeof_field(struct sock_common,
9825                                          skc_rcv_saddr) != 4);
9826
9827                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9828                                              struct sk_msg, sk),
9829                                      si->dst_reg, si->src_reg,
9830                                      offsetof(struct sk_msg, sk));
9831                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9832                                      offsetof(struct sock_common,
9833                                               skc_rcv_saddr));
9834                break;
9835
9836        case offsetof(struct sk_msg_md, remote_ip6[0]) ...
9837             offsetof(struct sk_msg_md, remote_ip6[3]):
9838#if IS_ENABLED(CONFIG_IPV6)
9839                BUILD_BUG_ON(sizeof_field(struct sock_common,
9840                                          skc_v6_daddr.s6_addr32[0]) != 4);
9841
9842                off = si->off;
9843                off -= offsetof(struct sk_msg_md, remote_ip6[0]);
9844                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9845                                                struct sk_msg, sk),
9846                                      si->dst_reg, si->src_reg,
9847                                      offsetof(struct sk_msg, sk));
9848                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9849                                      offsetof(struct sock_common,
9850                                               skc_v6_daddr.s6_addr32[0]) +
9851                                      off);
9852#else
9853                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9854#endif
9855                break;
9856
9857        case offsetof(struct sk_msg_md, local_ip6[0]) ...
9858             offsetof(struct sk_msg_md, local_ip6[3]):
9859#if IS_ENABLED(CONFIG_IPV6)
9860                BUILD_BUG_ON(sizeof_field(struct sock_common,
9861                                          skc_v6_rcv_saddr.s6_addr32[0]) != 4);
9862
9863                off = si->off;
9864                off -= offsetof(struct sk_msg_md, local_ip6[0]);
9865                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9866                                                struct sk_msg, sk),
9867                                      si->dst_reg, si->src_reg,
9868                                      offsetof(struct sk_msg, sk));
9869                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9870                                      offsetof(struct sock_common,
9871                                               skc_v6_rcv_saddr.s6_addr32[0]) +
9872                                      off);
9873#else
9874                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9875#endif
9876                break;
9877
9878        case offsetof(struct sk_msg_md, remote_port):
9879                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
9880
9881                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9882                                                struct sk_msg, sk),
9883                                      si->dst_reg, si->src_reg,
9884                                      offsetof(struct sk_msg, sk));
9885                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9886                                      offsetof(struct sock_common, skc_dport));
9887#ifndef __BIG_ENDIAN_BITFIELD
9888                *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
9889#endif
9890                break;
9891
9892        case offsetof(struct sk_msg_md, local_port):
9893                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
9894
9895                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9896                                                struct sk_msg, sk),
9897                                      si->dst_reg, si->src_reg,
9898                                      offsetof(struct sk_msg, sk));
9899                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9900                                      offsetof(struct sock_common, skc_num));
9901                break;
9902
9903        case offsetof(struct sk_msg_md, size):
9904                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
9905                                      si->dst_reg, si->src_reg,
9906                                      offsetof(struct sk_msg_sg, size));
9907                break;
9908
9909        case offsetof(struct sk_msg_md, sk):
9910                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
9911                                      si->dst_reg, si->src_reg,
9912                                      offsetof(struct sk_msg, sk));
9913                break;
9914        }
9915
9916        return insn - insn_buf;
9917}
9918
9919const struct bpf_verifier_ops sk_filter_verifier_ops = {
9920        .get_func_proto         = sk_filter_func_proto,
9921        .is_valid_access        = sk_filter_is_valid_access,
9922        .convert_ctx_access     = bpf_convert_ctx_access,
9923        .gen_ld_abs             = bpf_gen_ld_abs,
9924};
9925
9926const struct bpf_prog_ops sk_filter_prog_ops = {
9927        .test_run               = bpf_prog_test_run_skb,
9928};
9929
9930const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
9931        .get_func_proto         = tc_cls_act_func_proto,
9932        .is_valid_access        = tc_cls_act_is_valid_access,
9933        .convert_ctx_access     = tc_cls_act_convert_ctx_access,
9934        .gen_prologue           = tc_cls_act_prologue,
9935        .gen_ld_abs             = bpf_gen_ld_abs,
9936        .check_kfunc_call       = bpf_prog_test_check_kfunc_call,
9937};
9938
9939const struct bpf_prog_ops tc_cls_act_prog_ops = {
9940        .test_run               = bpf_prog_test_run_skb,
9941};
9942
9943const struct bpf_verifier_ops xdp_verifier_ops = {
9944        .get_func_proto         = xdp_func_proto,
9945        .is_valid_access        = xdp_is_valid_access,
9946        .convert_ctx_access     = xdp_convert_ctx_access,
9947        .gen_prologue           = bpf_noop_prologue,
9948};
9949
9950const struct bpf_prog_ops xdp_prog_ops = {
9951        .test_run               = bpf_prog_test_run_xdp,
9952};
9953
9954const struct bpf_verifier_ops cg_skb_verifier_ops = {
9955        .get_func_proto         = cg_skb_func_proto,
9956        .is_valid_access        = cg_skb_is_valid_access,
9957        .convert_ctx_access     = bpf_convert_ctx_access,
9958};
9959
9960const struct bpf_prog_ops cg_skb_prog_ops = {
9961        .test_run               = bpf_prog_test_run_skb,
9962};
9963
9964const struct bpf_verifier_ops lwt_in_verifier_ops = {
9965        .get_func_proto         = lwt_in_func_proto,
9966        .is_valid_access        = lwt_is_valid_access,
9967        .convert_ctx_access     = bpf_convert_ctx_access,
9968};
9969
9970const struct bpf_prog_ops lwt_in_prog_ops = {
9971        .test_run               = bpf_prog_test_run_skb,
9972};
9973
9974const struct bpf_verifier_ops lwt_out_verifier_ops = {
9975        .get_func_proto         = lwt_out_func_proto,
9976        .is_valid_access        = lwt_is_valid_access,
9977        .convert_ctx_access     = bpf_convert_ctx_access,
9978};
9979
9980const struct bpf_prog_ops lwt_out_prog_ops = {
9981        .test_run               = bpf_prog_test_run_skb,
9982};
9983
9984const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
9985        .get_func_proto         = lwt_xmit_func_proto,
9986        .is_valid_access        = lwt_is_valid_access,
9987        .convert_ctx_access     = bpf_convert_ctx_access,
9988        .gen_prologue           = tc_cls_act_prologue,
9989};
9990
9991const struct bpf_prog_ops lwt_xmit_prog_ops = {
9992        .test_run               = bpf_prog_test_run_skb,
9993};
9994
9995const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
9996        .get_func_proto         = lwt_seg6local_func_proto,
9997        .is_valid_access        = lwt_is_valid_access,
9998        .convert_ctx_access     = bpf_convert_ctx_access,
9999};
10000
10001const struct bpf_prog_ops lwt_seg6local_prog_ops = {
10002        .test_run               = bpf_prog_test_run_skb,
10003};
10004
10005const struct bpf_verifier_ops cg_sock_verifier_ops = {
10006        .get_func_proto         = sock_filter_func_proto,
10007        .is_valid_access        = sock_filter_is_valid_access,
10008        .convert_ctx_access     = bpf_sock_convert_ctx_access,
10009};
10010
10011const struct bpf_prog_ops cg_sock_prog_ops = {
10012};
10013
10014const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
10015        .get_func_proto         = sock_addr_func_proto,
10016        .is_valid_access        = sock_addr_is_valid_access,
10017        .convert_ctx_access     = sock_addr_convert_ctx_access,
10018};
10019
10020const struct bpf_prog_ops cg_sock_addr_prog_ops = {
10021};
10022
10023const struct bpf_verifier_ops sock_ops_verifier_ops = {
10024        .get_func_proto         = sock_ops_func_proto,
10025        .is_valid_access        = sock_ops_is_valid_access,
10026        .convert_ctx_access     = sock_ops_convert_ctx_access,
10027};
10028
10029const struct bpf_prog_ops sock_ops_prog_ops = {
10030};
10031
10032const struct bpf_verifier_ops sk_skb_verifier_ops = {
10033        .get_func_proto         = sk_skb_func_proto,
10034        .is_valid_access        = sk_skb_is_valid_access,
10035        .convert_ctx_access     = sk_skb_convert_ctx_access,
10036        .gen_prologue           = sk_skb_prologue,
10037};
10038
10039const struct bpf_prog_ops sk_skb_prog_ops = {
10040};
10041
10042const struct bpf_verifier_ops sk_msg_verifier_ops = {
10043        .get_func_proto         = sk_msg_func_proto,
10044        .is_valid_access        = sk_msg_is_valid_access,
10045        .convert_ctx_access     = sk_msg_convert_ctx_access,
10046        .gen_prologue           = bpf_noop_prologue,
10047};
10048
10049const struct bpf_prog_ops sk_msg_prog_ops = {
10050};
10051
10052const struct bpf_verifier_ops flow_dissector_verifier_ops = {
10053        .get_func_proto         = flow_dissector_func_proto,
10054        .is_valid_access        = flow_dissector_is_valid_access,
10055        .convert_ctx_access     = flow_dissector_convert_ctx_access,
10056};
10057
10058const struct bpf_prog_ops flow_dissector_prog_ops = {
10059        .test_run               = bpf_prog_test_run_flow_dissector,
10060};
10061
10062int sk_detach_filter(struct sock *sk)
10063{
10064        int ret = -ENOENT;
10065        struct sk_filter *filter;
10066
10067        if (sock_flag(sk, SOCK_FILTER_LOCKED))
10068                return -EPERM;
10069
10070        filter = rcu_dereference_protected(sk->sk_filter,
10071                                           lockdep_sock_is_held(sk));
10072        if (filter) {
10073                RCU_INIT_POINTER(sk->sk_filter, NULL);
10074                sk_filter_uncharge(sk, filter);
10075                ret = 0;
10076        }
10077
10078        return ret;
10079}
10080EXPORT_SYMBOL_GPL(sk_detach_filter);
10081
10082int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
10083                  unsigned int len)
10084{
10085        struct sock_fprog_kern *fprog;
10086        struct sk_filter *filter;
10087        int ret = 0;
10088
10089        lock_sock(sk);
10090        filter = rcu_dereference_protected(sk->sk_filter,
10091                                           lockdep_sock_is_held(sk));
10092        if (!filter)
10093                goto out;
10094
10095        /* We're copying the filter that has been originally attached,
10096         * so no conversion/decode needed anymore. eBPF programs that
10097         * have no original program cannot be dumped through this.
10098         */
10099        ret = -EACCES;
10100        fprog = filter->prog->orig_prog;
10101        if (!fprog)
10102                goto out;
10103
10104        ret = fprog->len;
10105        if (!len)
10106                /* User space only enquires number of filter blocks. */
10107                goto out;
10108
10109        ret = -EINVAL;
10110        if (len < fprog->len)
10111                goto out;
10112
10113        ret = -EFAULT;
10114        if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
10115                goto out;
10116
10117        /* Instead of bytes, the API requests to return the number
10118         * of filter blocks.
10119         */
10120        ret = fprog->len;
10121out:
10122        release_sock(sk);
10123        return ret;
10124}
10125
10126#ifdef CONFIG_INET
10127static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
10128                                    struct sock_reuseport *reuse,
10129                                    struct sock *sk, struct sk_buff *skb,
10130                                    struct sock *migrating_sk,
10131                                    u32 hash)
10132{
10133        reuse_kern->skb = skb;
10134        reuse_kern->sk = sk;
10135        reuse_kern->selected_sk = NULL;
10136        reuse_kern->migrating_sk = migrating_sk;
10137        reuse_kern->data_end = skb->data + skb_headlen(skb);
10138        reuse_kern->hash = hash;
10139        reuse_kern->reuseport_id = reuse->reuseport_id;
10140        reuse_kern->bind_inany = reuse->bind_inany;
10141}
10142
10143struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
10144                                  struct bpf_prog *prog, struct sk_buff *skb,
10145                                  struct sock *migrating_sk,
10146                                  u32 hash)
10147{
10148        struct sk_reuseport_kern reuse_kern;
10149        enum sk_action action;
10150
10151        bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
10152        action = bpf_prog_run(prog, &reuse_kern);
10153
10154        if (action == SK_PASS)
10155                return reuse_kern.selected_sk;
10156        else
10157                return ERR_PTR(-ECONNREFUSED);
10158}
10159
10160BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
10161           struct bpf_map *, map, void *, key, u32, flags)
10162{
10163        bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
10164        struct sock_reuseport *reuse;
10165        struct sock *selected_sk;
10166
10167        selected_sk = map->ops->map_lookup_elem(map, key);
10168        if (!selected_sk)
10169                return -ENOENT;
10170
10171        reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
10172        if (!reuse) {
10173                /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
10174                if (sk_is_refcounted(selected_sk))
10175                        sock_put(selected_sk);
10176
10177                /* reuseport_array has only sk with non NULL sk_reuseport_cb.
10178                 * The only (!reuse) case here is - the sk has already been
10179                 * unhashed (e.g. by close()), so treat it as -ENOENT.
10180                 *
10181                 * Other maps (e.g. sock_map) do not provide this guarantee and
10182                 * the sk may never be in the reuseport group to begin with.
10183                 */
10184                return is_sockarray ? -ENOENT : -EINVAL;
10185        }
10186
10187        if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
10188                struct sock *sk = reuse_kern->sk;
10189
10190                if (sk->sk_protocol != selected_sk->sk_protocol)
10191                        return -EPROTOTYPE;
10192                else if (sk->sk_family != selected_sk->sk_family)
10193                        return -EAFNOSUPPORT;
10194
10195                /* Catch all. Likely bound to a different sockaddr. */
10196                return -EBADFD;
10197        }
10198
10199        reuse_kern->selected_sk = selected_sk;
10200
10201        return 0;
10202}
10203
10204static const struct bpf_func_proto sk_select_reuseport_proto = {
10205        .func           = sk_select_reuseport,
10206        .gpl_only       = false,
10207        .ret_type       = RET_INTEGER,
10208        .arg1_type      = ARG_PTR_TO_CTX,
10209        .arg2_type      = ARG_CONST_MAP_PTR,
10210        .arg3_type      = ARG_PTR_TO_MAP_KEY,
10211        .arg4_type      = ARG_ANYTHING,
10212};
10213
10214BPF_CALL_4(sk_reuseport_load_bytes,
10215           const struct sk_reuseport_kern *, reuse_kern, u32, offset,
10216           void *, to, u32, len)
10217{
10218        return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
10219}
10220
10221static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
10222        .func           = sk_reuseport_load_bytes,
10223        .gpl_only       = false,
10224        .ret_type       = RET_INTEGER,
10225        .arg1_type      = ARG_PTR_TO_CTX,
10226        .arg2_type      = ARG_ANYTHING,
10227        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
10228        .arg4_type      = ARG_CONST_SIZE,
10229};
10230
10231BPF_CALL_5(sk_reuseport_load_bytes_relative,
10232           const struct sk_reuseport_kern *, reuse_kern, u32, offset,
10233           void *, to, u32, len, u32, start_header)
10234{
10235        return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
10236                                               len, start_header);
10237}
10238
10239static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
10240        .func           = sk_reuseport_load_bytes_relative,
10241        .gpl_only       = false,
10242        .ret_type       = RET_INTEGER,
10243        .arg1_type      = ARG_PTR_TO_CTX,
10244        .arg2_type      = ARG_ANYTHING,
10245        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
10246        .arg4_type      = ARG_CONST_SIZE,
10247        .arg5_type      = ARG_ANYTHING,
10248};
10249
10250static const struct bpf_func_proto *
10251sk_reuseport_func_proto(enum bpf_func_id func_id,
10252                        const struct bpf_prog *prog)
10253{
10254        switch (func_id) {
10255        case BPF_FUNC_sk_select_reuseport:
10256                return &sk_select_reuseport_proto;
10257        case BPF_FUNC_skb_load_bytes:
10258                return &sk_reuseport_load_bytes_proto;
10259        case BPF_FUNC_skb_load_bytes_relative:
10260                return &sk_reuseport_load_bytes_relative_proto;
10261        case BPF_FUNC_get_socket_cookie:
10262                return &bpf_get_socket_ptr_cookie_proto;
10263        default:
10264                return bpf_base_func_proto(func_id);
10265        }
10266}
10267
10268static bool
10269sk_reuseport_is_valid_access(int off, int size,
10270                             enum bpf_access_type type,
10271                             const struct bpf_prog *prog,
10272                             struct bpf_insn_access_aux *info)
10273{
10274        const u32 size_default = sizeof(__u32);
10275
10276        if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
10277            off % size || type != BPF_READ)
10278                return false;
10279
10280        switch (off) {
10281        case offsetof(struct sk_reuseport_md, data):
10282                info->reg_type = PTR_TO_PACKET;
10283                return size == sizeof(__u64);
10284
10285        case offsetof(struct sk_reuseport_md, data_end):
10286                info->reg_type = PTR_TO_PACKET_END;
10287                return size == sizeof(__u64);
10288
10289        case offsetof(struct sk_reuseport_md, hash):
10290                return size == size_default;
10291
10292        case offsetof(struct sk_reuseport_md, sk):
10293                info->reg_type = PTR_TO_SOCKET;
10294                return size == sizeof(__u64);
10295
10296        case offsetof(struct sk_reuseport_md, migrating_sk):
10297                info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
10298                return size == sizeof(__u64);
10299
10300        /* Fields that allow narrowing */
10301        case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
10302                if (size < sizeof_field(struct sk_buff, protocol))
10303                        return false;
10304                fallthrough;
10305        case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
10306        case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
10307        case bpf_ctx_range(struct sk_reuseport_md, len):
10308                bpf_ctx_record_field_size(info, size_default);
10309                return bpf_ctx_narrow_access_ok(off, size, size_default);
10310
10311        default:
10312                return false;
10313        }
10314}
10315
10316#define SK_REUSEPORT_LOAD_FIELD(F) ({                                   \
10317        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
10318                              si->dst_reg, si->src_reg,                 \
10319                              bpf_target_off(struct sk_reuseport_kern, F, \
10320                                             sizeof_field(struct sk_reuseport_kern, F), \
10321                                             target_size));             \
10322        })
10323
10324#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD)                          \
10325        SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,           \
10326                                    struct sk_buff,                     \
10327                                    skb,                                \
10328                                    SKB_FIELD)
10329
10330#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD)                            \
10331        SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,           \
10332                                    struct sock,                        \
10333                                    sk,                                 \
10334                                    SK_FIELD)
10335
10336static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
10337                                           const struct bpf_insn *si,
10338                                           struct bpf_insn *insn_buf,
10339                                           struct bpf_prog *prog,
10340                                           u32 *target_size)
10341{
10342        struct bpf_insn *insn = insn_buf;
10343
10344        switch (si->off) {
10345        case offsetof(struct sk_reuseport_md, data):
10346                SK_REUSEPORT_LOAD_SKB_FIELD(data);
10347                break;
10348
10349        case offsetof(struct sk_reuseport_md, len):
10350                SK_REUSEPORT_LOAD_SKB_FIELD(len);
10351                break;
10352
10353        case offsetof(struct sk_reuseport_md, eth_protocol):
10354                SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
10355                break;
10356
10357        case offsetof(struct sk_reuseport_md, ip_protocol):
10358                SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
10359                break;
10360
10361        case offsetof(struct sk_reuseport_md, data_end):
10362                SK_REUSEPORT_LOAD_FIELD(data_end);
10363                break;
10364
10365        case offsetof(struct sk_reuseport_md, hash):
10366                SK_REUSEPORT_LOAD_FIELD(hash);
10367                break;
10368
10369        case offsetof(struct sk_reuseport_md, bind_inany):
10370                SK_REUSEPORT_LOAD_FIELD(bind_inany);
10371                break;
10372
10373        case offsetof(struct sk_reuseport_md, sk):
10374                SK_REUSEPORT_LOAD_FIELD(sk);
10375                break;
10376
10377        case offsetof(struct sk_reuseport_md, migrating_sk):
10378                SK_REUSEPORT_LOAD_FIELD(migrating_sk);
10379                break;
10380        }
10381
10382        return insn - insn_buf;
10383}
10384
10385const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
10386        .get_func_proto         = sk_reuseport_func_proto,
10387        .is_valid_access        = sk_reuseport_is_valid_access,
10388        .convert_ctx_access     = sk_reuseport_convert_ctx_access,
10389};
10390
10391const struct bpf_prog_ops sk_reuseport_prog_ops = {
10392};
10393
10394DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
10395EXPORT_SYMBOL(bpf_sk_lookup_enabled);
10396
10397BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
10398           struct sock *, sk, u64, flags)
10399{
10400        if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
10401                               BPF_SK_LOOKUP_F_NO_REUSEPORT)))
10402                return -EINVAL;
10403        if (unlikely(sk && sk_is_refcounted(sk)))
10404                return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
10405        if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
10406                return -ESOCKTNOSUPPORT; /* reject connected sockets */
10407
10408        /* Check if socket is suitable for packet L3/L4 protocol */
10409        if (sk && sk->sk_protocol != ctx->protocol)
10410                return -EPROTOTYPE;
10411        if (sk && sk->sk_family != ctx->family &&
10412            (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
10413                return -EAFNOSUPPORT;
10414
10415        if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
10416                return -EEXIST;
10417
10418        /* Select socket as lookup result */
10419        ctx->selected_sk = sk;
10420        ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
10421        return 0;
10422}
10423
10424static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
10425        .func           = bpf_sk_lookup_assign,
10426        .gpl_only       = false,
10427        .ret_type       = RET_INTEGER,
10428        .arg1_type      = ARG_PTR_TO_CTX,
10429        .arg2_type      = ARG_PTR_TO_SOCKET_OR_NULL,
10430        .arg3_type      = ARG_ANYTHING,
10431};
10432
10433static const struct bpf_func_proto *
10434sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
10435{
10436        switch (func_id) {
10437        case BPF_FUNC_perf_event_output:
10438                return &bpf_event_output_data_proto;
10439        case BPF_FUNC_sk_assign:
10440                return &bpf_sk_lookup_assign_proto;
10441        case BPF_FUNC_sk_release:
10442                return &bpf_sk_release_proto;
10443        default:
10444                return bpf_sk_base_func_proto(func_id);
10445        }
10446}
10447
10448static bool sk_lookup_is_valid_access(int off, int size,
10449                                      enum bpf_access_type type,
10450                                      const struct bpf_prog *prog,
10451                                      struct bpf_insn_access_aux *info)
10452{
10453        if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
10454                return false;
10455        if (off % size != 0)
10456                return false;
10457        if (type != BPF_READ)
10458                return false;
10459
10460        switch (off) {
10461        case offsetof(struct bpf_sk_lookup, sk):
10462                info->reg_type = PTR_TO_SOCKET_OR_NULL;
10463                return size == sizeof(__u64);
10464
10465        case bpf_ctx_range(struct bpf_sk_lookup, family):
10466        case bpf_ctx_range(struct bpf_sk_lookup, protocol):
10467        case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
10468        case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
10469        case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
10470        case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
10471        case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
10472        case bpf_ctx_range(struct bpf_sk_lookup, local_port):
10473                bpf_ctx_record_field_size(info, sizeof(__u32));
10474                return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
10475
10476        default:
10477                return false;
10478        }
10479}
10480
10481static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
10482                                        const struct bpf_insn *si,
10483                                        struct bpf_insn *insn_buf,
10484                                        struct bpf_prog *prog,
10485                                        u32 *target_size)
10486{
10487        struct bpf_insn *insn = insn_buf;
10488
10489        switch (si->off) {
10490        case offsetof(struct bpf_sk_lookup, sk):
10491                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
10492                                      offsetof(struct bpf_sk_lookup_kern, selected_sk));
10493                break;
10494
10495        case offsetof(struct bpf_sk_lookup, family):
10496                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10497                                      bpf_target_off(struct bpf_sk_lookup_kern,
10498                                                     family, 2, target_size));
10499                break;
10500
10501        case offsetof(struct bpf_sk_lookup, protocol):
10502                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10503                                      bpf_target_off(struct bpf_sk_lookup_kern,
10504                                                     protocol, 2, target_size));
10505                break;
10506
10507        case offsetof(struct bpf_sk_lookup, remote_ip4):
10508                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10509                                      bpf_target_off(struct bpf_sk_lookup_kern,
10510                                                     v4.saddr, 4, target_size));
10511                break;
10512
10513        case offsetof(struct bpf_sk_lookup, local_ip4):
10514                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10515                                      bpf_target_off(struct bpf_sk_lookup_kern,
10516                                                     v4.daddr, 4, target_size));
10517                break;
10518
10519        case bpf_ctx_range_till(struct bpf_sk_lookup,
10520                                remote_ip6[0], remote_ip6[3]): {
10521#if IS_ENABLED(CONFIG_IPV6)
10522                int off = si->off;
10523
10524                off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
10525                off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
10526                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
10527                                      offsetof(struct bpf_sk_lookup_kern, v6.saddr));
10528                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
10529                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
10530#else
10531                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10532#endif
10533                break;
10534        }
10535        case bpf_ctx_range_till(struct bpf_sk_lookup,
10536                                local_ip6[0], local_ip6[3]): {
10537#if IS_ENABLED(CONFIG_IPV6)
10538                int off = si->off;
10539
10540                off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
10541                off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
10542                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
10543                                      offsetof(struct bpf_sk_lookup_kern, v6.daddr));
10544                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
10545                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
10546#else
10547                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10548#endif
10549                break;
10550        }
10551        case offsetof(struct bpf_sk_lookup, remote_port):
10552                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10553                                      bpf_target_off(struct bpf_sk_lookup_kern,
10554                                                     sport, 2, target_size));
10555                break;
10556
10557        case offsetof(struct bpf_sk_lookup, local_port):
10558                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10559                                      bpf_target_off(struct bpf_sk_lookup_kern,
10560                                                     dport, 2, target_size));
10561                break;
10562        }
10563
10564        return insn - insn_buf;
10565}
10566
10567const struct bpf_prog_ops sk_lookup_prog_ops = {
10568        .test_run = bpf_prog_test_run_sk_lookup,
10569};
10570
10571const struct bpf_verifier_ops sk_lookup_verifier_ops = {
10572        .get_func_proto         = sk_lookup_func_proto,
10573        .is_valid_access        = sk_lookup_is_valid_access,
10574        .convert_ctx_access     = sk_lookup_convert_ctx_access,
10575};
10576
10577#endif /* CONFIG_INET */
10578
10579DEFINE_BPF_DISPATCHER(xdp)
10580
10581void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
10582{
10583        bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
10584}
10585
10586#ifdef CONFIG_DEBUG_INFO_BTF
10587BTF_ID_LIST_GLOBAL(btf_sock_ids)
10588#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
10589BTF_SOCK_TYPE_xxx
10590#undef BTF_SOCK_TYPE
10591#else
10592u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
10593#endif
10594
10595BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
10596{
10597        /* tcp6_sock type is not generated in dwarf and hence btf,
10598         * trigger an explicit type generation here.
10599         */
10600        BTF_TYPE_EMIT(struct tcp6_sock);
10601        if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
10602            sk->sk_family == AF_INET6)
10603                return (unsigned long)sk;
10604
10605        return (unsigned long)NULL;
10606}
10607
10608const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
10609        .func                   = bpf_skc_to_tcp6_sock,
10610        .gpl_only               = false,
10611        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
10612        .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10613        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
10614};
10615
10616BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
10617{
10618        if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
10619                return (unsigned long)sk;
10620
10621        return (unsigned long)NULL;
10622}
10623
10624const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
10625        .func                   = bpf_skc_to_tcp_sock,
10626        .gpl_only               = false,
10627        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
10628        .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10629        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
10630};
10631
10632BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
10633{
10634        /* BTF types for tcp_timewait_sock and inet_timewait_sock are not
10635         * generated if CONFIG_INET=n. Trigger an explicit generation here.
10636         */
10637        BTF_TYPE_EMIT(struct inet_timewait_sock);
10638        BTF_TYPE_EMIT(struct tcp_timewait_sock);
10639
10640#ifdef CONFIG_INET
10641        if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
10642                return (unsigned long)sk;
10643#endif
10644
10645#if IS_BUILTIN(CONFIG_IPV6)
10646        if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
10647                return (unsigned long)sk;
10648#endif
10649
10650        return (unsigned long)NULL;
10651}
10652
10653const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
10654        .func                   = bpf_skc_to_tcp_timewait_sock,
10655        .gpl_only               = false,
10656        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
10657        .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10658        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
10659};
10660
10661BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
10662{
10663#ifdef CONFIG_INET
10664        if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
10665                return (unsigned long)sk;
10666#endif
10667
10668#if IS_BUILTIN(CONFIG_IPV6)
10669        if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
10670                return (unsigned long)sk;
10671#endif
10672
10673        return (unsigned long)NULL;
10674}
10675
10676const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
10677        .func                   = bpf_skc_to_tcp_request_sock,
10678        .gpl_only               = false,
10679        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
10680        .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10681        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
10682};
10683
10684BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
10685{
10686        /* udp6_sock type is not generated in dwarf and hence btf,
10687         * trigger an explicit type generation here.
10688         */
10689        BTF_TYPE_EMIT(struct udp6_sock);
10690        if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
10691            sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
10692                return (unsigned long)sk;
10693
10694        return (unsigned long)NULL;
10695}
10696
10697const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
10698        .func                   = bpf_skc_to_udp6_sock,
10699        .gpl_only               = false,
10700        .ret_type               = RET_PTR_TO_BTF_ID_OR_NULL,
10701        .arg1_type              = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10702        .ret_btf_id             = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
10703};
10704
10705BPF_CALL_1(bpf_sock_from_file, struct file *, file)
10706{
10707        return (unsigned long)sock_from_file(file);
10708}
10709
10710BTF_ID_LIST(bpf_sock_from_file_btf_ids)
10711BTF_ID(struct, socket)
10712BTF_ID(struct, file)
10713
10714const struct bpf_func_proto bpf_sock_from_file_proto = {
10715        .func           = bpf_sock_from_file,
10716        .gpl_only       = false,
10717        .ret_type       = RET_PTR_TO_BTF_ID_OR_NULL,
10718        .ret_btf_id     = &bpf_sock_from_file_btf_ids[0],
10719        .arg1_type      = ARG_PTR_TO_BTF_ID,
10720        .arg1_btf_id    = &bpf_sock_from_file_btf_ids[1],
10721};
10722
10723static const struct bpf_func_proto *
10724bpf_sk_base_func_proto(enum bpf_func_id func_id)
10725{
10726        const struct bpf_func_proto *func;
10727
10728        switch (func_id) {
10729        case BPF_FUNC_skc_to_tcp6_sock:
10730                func = &bpf_skc_to_tcp6_sock_proto;
10731                break;
10732        case BPF_FUNC_skc_to_tcp_sock:
10733                func = &bpf_skc_to_tcp_sock_proto;
10734                break;
10735        case BPF_FUNC_skc_to_tcp_timewait_sock:
10736                func = &bpf_skc_to_tcp_timewait_sock_proto;
10737                break;
10738        case BPF_FUNC_skc_to_tcp_request_sock:
10739                func = &bpf_skc_to_tcp_request_sock_proto;
10740                break;
10741        case BPF_FUNC_skc_to_udp6_sock:
10742                func = &bpf_skc_to_udp6_sock_proto;
10743                break;
10744        default:
10745                return bpf_base_func_proto(func_id);
10746        }
10747
10748        if (!perfmon_capable())
10749                return NULL;
10750
10751        return func;
10752}
10753