linux/tools/testing/selftests/bpf/progs/test_sock_fields.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2019 Facebook */
   3
   4#include <linux/bpf.h>
   5#include <netinet/in.h>
   6#include <stdbool.h>
   7
   8#include <bpf/bpf_helpers.h>
   9#include <bpf/bpf_endian.h>
  10#include "bpf_tcp_helpers.h"
  11
  12enum bpf_linum_array_idx {
  13        EGRESS_LINUM_IDX,
  14        INGRESS_LINUM_IDX,
  15        READ_SK_DST_PORT_LINUM_IDX,
  16        __NR_BPF_LINUM_ARRAY_IDX,
  17};
  18
  19struct {
  20        __uint(type, BPF_MAP_TYPE_ARRAY);
  21        __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
  22        __type(key, __u32);
  23        __type(value, __u32);
  24} linum_map SEC(".maps");
  25
  26struct bpf_spinlock_cnt {
  27        struct bpf_spin_lock lock;
  28        __u32 cnt;
  29};
  30
  31struct {
  32        __uint(type, BPF_MAP_TYPE_SK_STORAGE);
  33        __uint(map_flags, BPF_F_NO_PREALLOC);
  34        __type(key, int);
  35        __type(value, struct bpf_spinlock_cnt);
  36} sk_pkt_out_cnt SEC(".maps");
  37
  38struct {
  39        __uint(type, BPF_MAP_TYPE_SK_STORAGE);
  40        __uint(map_flags, BPF_F_NO_PREALLOC);
  41        __type(key, int);
  42        __type(value, struct bpf_spinlock_cnt);
  43} sk_pkt_out_cnt10 SEC(".maps");
  44
  45struct bpf_tcp_sock listen_tp = {};
  46struct sockaddr_in6 srv_sa6 = {};
  47struct bpf_tcp_sock cli_tp = {};
  48struct bpf_tcp_sock srv_tp = {};
  49struct bpf_sock listen_sk = {};
  50struct bpf_sock srv_sk = {};
  51struct bpf_sock cli_sk = {};
  52__u64 parent_cg_id = 0;
  53__u64 child_cg_id = 0;
  54__u64 lsndtime = 0;
  55
  56static bool is_loopback6(__u32 *a6)
  57{
  58        return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
  59}
  60
  61static void skcpy(struct bpf_sock *dst,
  62                  const struct bpf_sock *src)
  63{
  64        dst->bound_dev_if = src->bound_dev_if;
  65        dst->family = src->family;
  66        dst->type = src->type;
  67        dst->protocol = src->protocol;
  68        dst->mark = src->mark;
  69        dst->priority = src->priority;
  70        dst->src_ip4 = src->src_ip4;
  71        dst->src_ip6[0] = src->src_ip6[0];
  72        dst->src_ip6[1] = src->src_ip6[1];
  73        dst->src_ip6[2] = src->src_ip6[2];
  74        dst->src_ip6[3] = src->src_ip6[3];
  75        dst->src_port = src->src_port;
  76        dst->dst_ip4 = src->dst_ip4;
  77        dst->dst_ip6[0] = src->dst_ip6[0];
  78        dst->dst_ip6[1] = src->dst_ip6[1];
  79        dst->dst_ip6[2] = src->dst_ip6[2];
  80        dst->dst_ip6[3] = src->dst_ip6[3];
  81        dst->dst_port = src->dst_port;
  82        dst->state = src->state;
  83}
  84
  85static void tpcpy(struct bpf_tcp_sock *dst,
  86                  const struct bpf_tcp_sock *src)
  87{
  88        dst->snd_cwnd = src->snd_cwnd;
  89        dst->srtt_us = src->srtt_us;
  90        dst->rtt_min = src->rtt_min;
  91        dst->snd_ssthresh = src->snd_ssthresh;
  92        dst->rcv_nxt = src->rcv_nxt;
  93        dst->snd_nxt = src->snd_nxt;
  94        dst->snd_una = src->snd_una;
  95        dst->mss_cache = src->mss_cache;
  96        dst->ecn_flags = src->ecn_flags;
  97        dst->rate_delivered = src->rate_delivered;
  98        dst->rate_interval_us = src->rate_interval_us;
  99        dst->packets_out = src->packets_out;
 100        dst->retrans_out = src->retrans_out;
 101        dst->total_retrans = src->total_retrans;
 102        dst->segs_in = src->segs_in;
 103        dst->data_segs_in = src->data_segs_in;
 104        dst->segs_out = src->segs_out;
 105        dst->data_segs_out = src->data_segs_out;
 106        dst->lost_out = src->lost_out;
 107        dst->sacked_out = src->sacked_out;
 108        dst->bytes_received = src->bytes_received;
 109        dst->bytes_acked = src->bytes_acked;
 110}
 111
 112/* Always return CG_OK so that no pkt will be filtered out */
 113#define CG_OK 1
 114
 115#define RET_LOG() ({                                            \
 116        linum = __LINE__;                                       \
 117        bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY);   \
 118        return CG_OK;                                           \
 119})
 120
 121SEC("cgroup_skb/egress")
 122int egress_read_sock_fields(struct __sk_buff *skb)
 123{
 124        struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
 125        struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
 126        struct bpf_tcp_sock *tp, *tp_ret;
 127        struct bpf_sock *sk, *sk_ret;
 128        __u32 linum, linum_idx;
 129        struct tcp_sock *ktp;
 130
 131        linum_idx = EGRESS_LINUM_IDX;
 132
 133        sk = skb->sk;
 134        if (!sk)
 135                RET_LOG();
 136
 137        /* Not testing the egress traffic or the listening socket,
 138         * which are covered by the cgroup_skb/ingress test program.
 139         */
 140        if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
 141            sk->state == BPF_TCP_LISTEN)
 142                return CG_OK;
 143
 144        if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
 145                /* Server socket */
 146                sk_ret = &srv_sk;
 147                tp_ret = &srv_tp;
 148        } else if (sk->dst_port == srv_sa6.sin6_port) {
 149                /* Client socket */
 150                sk_ret = &cli_sk;
 151                tp_ret = &cli_tp;
 152        } else {
 153                /* Not the testing egress traffic */
 154                return CG_OK;
 155        }
 156
 157        /* It must be a fullsock for cgroup_skb/egress prog */
 158        sk = bpf_sk_fullsock(sk);
 159        if (!sk)
 160                RET_LOG();
 161
 162        /* Not the testing egress traffic */
 163        if (sk->protocol != IPPROTO_TCP)
 164                return CG_OK;
 165
 166        tp = bpf_tcp_sock(sk);
 167        if (!tp)
 168                RET_LOG();
 169
 170        skcpy(sk_ret, sk);
 171        tpcpy(tp_ret, tp);
 172
 173        if (sk_ret == &srv_sk) {
 174                ktp = bpf_skc_to_tcp_sock(sk);
 175
 176                if (!ktp)
 177                        RET_LOG();
 178
 179                lsndtime = ktp->lsndtime;
 180
 181                child_cg_id = bpf_sk_cgroup_id(ktp);
 182                if (!child_cg_id)
 183                        RET_LOG();
 184
 185                parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
 186                if (!parent_cg_id)
 187                        RET_LOG();
 188
 189                /* The userspace has created it for srv sk */
 190                pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
 191                pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
 192                                                   0, 0);
 193        } else {
 194                pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
 195                                                 &cli_cnt_init,
 196                                                 BPF_SK_STORAGE_GET_F_CREATE);
 197                pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
 198                                                   sk, &cli_cnt_init,
 199                                                   BPF_SK_STORAGE_GET_F_CREATE);
 200        }
 201
 202        if (!pkt_out_cnt || !pkt_out_cnt10)
 203                RET_LOG();
 204
 205        /* Even both cnt and cnt10 have lock defined in their BTF,
 206         * intentionally one cnt takes lock while one does not
 207         * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
 208         */
 209        pkt_out_cnt->cnt += 1;
 210        bpf_spin_lock(&pkt_out_cnt10->lock);
 211        pkt_out_cnt10->cnt += 10;
 212        bpf_spin_unlock(&pkt_out_cnt10->lock);
 213
 214        return CG_OK;
 215}
 216
 217SEC("cgroup_skb/ingress")
 218int ingress_read_sock_fields(struct __sk_buff *skb)
 219{
 220        struct bpf_tcp_sock *tp;
 221        __u32 linum, linum_idx;
 222        struct bpf_sock *sk;
 223
 224        linum_idx = INGRESS_LINUM_IDX;
 225
 226        sk = skb->sk;
 227        if (!sk)
 228                RET_LOG();
 229
 230        /* Not the testing ingress traffic to the server */
 231        if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
 232            sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
 233                return CG_OK;
 234
 235        /* Only interested in the listening socket */
 236        if (sk->state != BPF_TCP_LISTEN)
 237                return CG_OK;
 238
 239        /* It must be a fullsock for cgroup_skb/ingress prog */
 240        sk = bpf_sk_fullsock(sk);
 241        if (!sk)
 242                RET_LOG();
 243
 244        tp = bpf_tcp_sock(sk);
 245        if (!tp)
 246                RET_LOG();
 247
 248        skcpy(&listen_sk, sk);
 249        tpcpy(&listen_tp, tp);
 250
 251        return CG_OK;
 252}
 253
 254/*
 255 * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
 256 * gets rewritten by the access converter to a 2-byte load for
 257 * backward compatibility. Treating the load result as a be16 value
 258 * makes the code portable across little- and big-endian platforms.
 259 */
 260static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
 261{
 262        __u32 *word = (__u32 *)&sk->dst_port;
 263        return word[0] == bpf_htons(0xcafe);
 264}
 265
 266static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
 267{
 268        __u16 *half = (__u16 *)&sk->dst_port;
 269        return half[0] == bpf_htons(0xcafe);
 270}
 271
 272static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
 273{
 274        __u8 *byte = (__u8 *)&sk->dst_port;
 275        return byte[0] == 0xca && byte[1] == 0xfe;
 276}
 277
 278SEC("cgroup_skb/egress")
 279int read_sk_dst_port(struct __sk_buff *skb)
 280{
 281        __u32 linum, linum_idx;
 282        struct bpf_sock *sk;
 283
 284        linum_idx = READ_SK_DST_PORT_LINUM_IDX;
 285
 286        sk = skb->sk;
 287        if (!sk)
 288                RET_LOG();
 289
 290        /* Ignore everything but the SYN from the client socket */
 291        if (sk->state != BPF_TCP_SYN_SENT)
 292                return CG_OK;
 293
 294        if (!sk_dst_port__load_word(sk))
 295                RET_LOG();
 296        if (!sk_dst_port__load_half(sk))
 297                RET_LOG();
 298        if (!sk_dst_port__load_byte(sk))
 299                RET_LOG();
 300
 301        return CG_OK;
 302}
 303
 304char _license[] SEC("license") = "GPL";
 305