linux/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2020 Facebook */
   3
   4#include <stddef.h>
   5#include <errno.h>
   6#include <stdbool.h>
   7#include <sys/types.h>
   8#include <sys/socket.h>
   9#include <linux/tcp.h>
  10#include <linux/socket.h>
  11#include <linux/bpf.h>
  12#include <linux/types.h>
  13#include <bpf/bpf_helpers.h>
  14#include <bpf/bpf_endian.h>
  15#define BPF_PROG_TEST_TCP_HDR_OPTIONS
  16#include "test_tcp_hdr_options.h"
  17
  18#ifndef sizeof_field
  19#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
  20#endif
  21
  22__u8 test_kind = TCPOPT_EXP;
  23__u16 test_magic = 0xeB9F;
  24__u32 inherit_cb_flags = 0;
  25
  26struct bpf_test_option passive_synack_out = {};
  27struct bpf_test_option passive_fin_out  = {};
  28
  29struct bpf_test_option passive_estab_in = {};
  30struct bpf_test_option passive_fin_in   = {};
  31
  32struct bpf_test_option active_syn_out   = {};
  33struct bpf_test_option active_fin_out   = {};
  34
  35struct bpf_test_option active_estab_in  = {};
  36struct bpf_test_option active_fin_in    = {};
  37
  38struct {
  39        __uint(type, BPF_MAP_TYPE_SK_STORAGE);
  40        __uint(map_flags, BPF_F_NO_PREALLOC);
  41        __type(key, int);
  42        __type(value, struct hdr_stg);
  43} hdr_stg_map SEC(".maps");
  44
  45static bool skops_want_cookie(const struct bpf_sock_ops *skops)
  46{
  47        return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
  48}
  49
  50static bool skops_current_mss(const struct bpf_sock_ops *skops)
  51{
  52        return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
  53}
  54
  55static __u8 option_total_len(__u8 flags)
  56{
  57        __u8 i, len = 1; /* +1 for flags */
  58
  59        if (!flags)
  60                return 0;
  61
  62        /* RESEND bit does not use a byte */
  63        for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
  64                len += !!TEST_OPTION_FLAGS(flags, i);
  65
  66        if (test_kind == TCPOPT_EXP)
  67                return len + TCP_BPF_EXPOPT_BASE_LEN;
  68        else
  69                return len + 2; /* +1 kind, +1 kind-len */
  70}
  71
  72static void write_test_option(const struct bpf_test_option *test_opt,
  73                              __u8 *data)
  74{
  75        __u8 offset = 0;
  76
  77        data[offset++] = test_opt->flags;
  78        if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
  79                data[offset++] = test_opt->max_delack_ms;
  80
  81        if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
  82                data[offset++] = test_opt->rand;
  83}
  84
  85static int store_option(struct bpf_sock_ops *skops,
  86                        const struct bpf_test_option *test_opt)
  87{
  88        union {
  89                struct tcp_exprm_opt exprm;
  90                struct tcp_opt regular;
  91        } write_opt;
  92        int err;
  93
  94        if (test_kind == TCPOPT_EXP) {
  95                write_opt.exprm.kind = TCPOPT_EXP;
  96                write_opt.exprm.len = option_total_len(test_opt->flags);
  97                write_opt.exprm.magic = __bpf_htons(test_magic);
  98                write_opt.exprm.data32 = 0;
  99                write_test_option(test_opt, write_opt.exprm.data);
 100                err = bpf_store_hdr_opt(skops, &write_opt.exprm,
 101                                        sizeof(write_opt.exprm), 0);
 102        } else {
 103                write_opt.regular.kind = test_kind;
 104                write_opt.regular.len = option_total_len(test_opt->flags);
 105                write_opt.regular.data32 = 0;
 106                write_test_option(test_opt, write_opt.regular.data);
 107                err = bpf_store_hdr_opt(skops, &write_opt.regular,
 108                                        sizeof(write_opt.regular), 0);
 109        }
 110
 111        if (err)
 112                RET_CG_ERR(err);
 113
 114        return CG_OK;
 115}
 116
 117static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
 118{
 119        opt->flags = *start++;
 120
 121        if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
 122                opt->max_delack_ms = *start++;
 123
 124        if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
 125                opt->rand = *start++;
 126
 127        return 0;
 128}
 129
 130static int load_option(struct bpf_sock_ops *skops,
 131                       struct bpf_test_option *test_opt, bool from_syn)
 132{
 133        union {
 134                struct tcp_exprm_opt exprm;
 135                struct tcp_opt regular;
 136        } search_opt;
 137        int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
 138
 139        if (test_kind == TCPOPT_EXP) {
 140                search_opt.exprm.kind = TCPOPT_EXP;
 141                search_opt.exprm.len = 4;
 142                search_opt.exprm.magic = __bpf_htons(test_magic);
 143                search_opt.exprm.data32 = 0;
 144                ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
 145                                       sizeof(search_opt.exprm), load_flags);
 146                if (ret < 0)
 147                        return ret;
 148                return parse_test_option(test_opt, search_opt.exprm.data);
 149        } else {
 150                search_opt.regular.kind = test_kind;
 151                search_opt.regular.len = 0;
 152                search_opt.regular.data32 = 0;
 153                ret = bpf_load_hdr_opt(skops, &search_opt.regular,
 154                                       sizeof(search_opt.regular), load_flags);
 155                if (ret < 0)
 156                        return ret;
 157                return parse_test_option(test_opt, search_opt.regular.data);
 158        }
 159}
 160
 161static int synack_opt_len(struct bpf_sock_ops *skops)
 162{
 163        struct bpf_test_option test_opt = {};
 164        __u8 optlen;
 165        int err;
 166
 167        if (!passive_synack_out.flags)
 168                return CG_OK;
 169
 170        err = load_option(skops, &test_opt, true);
 171
 172        /* bpf_test_option is not found */
 173        if (err == -ENOMSG)
 174                return CG_OK;
 175
 176        if (err)
 177                RET_CG_ERR(err);
 178
 179        optlen = option_total_len(passive_synack_out.flags);
 180        if (optlen) {
 181                err = bpf_reserve_hdr_opt(skops, optlen, 0);
 182                if (err)
 183                        RET_CG_ERR(err);
 184        }
 185
 186        return CG_OK;
 187}
 188
 189static int write_synack_opt(struct bpf_sock_ops *skops)
 190{
 191        struct bpf_test_option opt;
 192
 193        if (!passive_synack_out.flags)
 194                /* We should not even be called since no header
 195                 * space has been reserved.
 196                 */
 197                RET_CG_ERR(0);
 198
 199        opt = passive_synack_out;
 200        if (skops_want_cookie(skops))
 201                SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
 202
 203        return store_option(skops, &opt);
 204}
 205
 206static int syn_opt_len(struct bpf_sock_ops *skops)
 207{
 208        __u8 optlen;
 209        int err;
 210
 211        if (!active_syn_out.flags)
 212                return CG_OK;
 213
 214        optlen = option_total_len(active_syn_out.flags);
 215        if (optlen) {
 216                err = bpf_reserve_hdr_opt(skops, optlen, 0);
 217                if (err)
 218                        RET_CG_ERR(err);
 219        }
 220
 221        return CG_OK;
 222}
 223
 224static int write_syn_opt(struct bpf_sock_ops *skops)
 225{
 226        if (!active_syn_out.flags)
 227                RET_CG_ERR(0);
 228
 229        return store_option(skops, &active_syn_out);
 230}
 231
 232static int fin_opt_len(struct bpf_sock_ops *skops)
 233{
 234        struct bpf_test_option *opt;
 235        struct hdr_stg *hdr_stg;
 236        __u8 optlen;
 237        int err;
 238
 239        if (!skops->sk)
 240                RET_CG_ERR(0);
 241
 242        hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
 243        if (!hdr_stg)
 244                RET_CG_ERR(0);
 245
 246        if (hdr_stg->active)
 247                opt = &active_fin_out;
 248        else
 249                opt = &passive_fin_out;
 250
 251        optlen = option_total_len(opt->flags);
 252        if (optlen) {
 253                err = bpf_reserve_hdr_opt(skops, optlen, 0);
 254                if (err)
 255                        RET_CG_ERR(err);
 256        }
 257
 258        return CG_OK;
 259}
 260
 261static int write_fin_opt(struct bpf_sock_ops *skops)
 262{
 263        struct bpf_test_option *opt;
 264        struct hdr_stg *hdr_stg;
 265
 266        if (!skops->sk)
 267                RET_CG_ERR(0);
 268
 269        hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
 270        if (!hdr_stg)
 271                RET_CG_ERR(0);
 272
 273        if (hdr_stg->active)
 274                opt = &active_fin_out;
 275        else
 276                opt = &passive_fin_out;
 277
 278        if (!opt->flags)
 279                RET_CG_ERR(0);
 280
 281        return store_option(skops, opt);
 282}
 283
 284static int resend_in_ack(struct bpf_sock_ops *skops)
 285{
 286        struct hdr_stg *hdr_stg;
 287
 288        if (!skops->sk)
 289                return -1;
 290
 291        hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
 292        if (!hdr_stg)
 293                return -1;
 294
 295        return !!hdr_stg->resend_syn;
 296}
 297
 298static int nodata_opt_len(struct bpf_sock_ops *skops)
 299{
 300        int resend;
 301
 302        resend = resend_in_ack(skops);
 303        if (resend < 0)
 304                RET_CG_ERR(0);
 305
 306        if (resend)
 307                return syn_opt_len(skops);
 308
 309        return CG_OK;
 310}
 311
 312static int write_nodata_opt(struct bpf_sock_ops *skops)
 313{
 314        int resend;
 315
 316        resend = resend_in_ack(skops);
 317        if (resend < 0)
 318                RET_CG_ERR(0);
 319
 320        if (resend)
 321                return write_syn_opt(skops);
 322
 323        return CG_OK;
 324}
 325
 326static int data_opt_len(struct bpf_sock_ops *skops)
 327{
 328        /* Same as the nodata version.  Mostly to show
 329         * an example usage on skops->skb_len.
 330         */
 331        return nodata_opt_len(skops);
 332}
 333
 334static int write_data_opt(struct bpf_sock_ops *skops)
 335{
 336        return write_nodata_opt(skops);
 337}
 338
 339static int current_mss_opt_len(struct bpf_sock_ops *skops)
 340{
 341        /* Reserve maximum that may be needed */
 342        int err;
 343
 344        err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
 345        if (err)
 346                RET_CG_ERR(err);
 347
 348        return CG_OK;
 349}
 350
 351static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
 352{
 353        __u8 tcp_flags = skops_tcp_flags(skops);
 354
 355        if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
 356                return synack_opt_len(skops);
 357
 358        if (tcp_flags & TCPHDR_SYN)
 359                return syn_opt_len(skops);
 360
 361        if (tcp_flags & TCPHDR_FIN)
 362                return fin_opt_len(skops);
 363
 364        if (skops_current_mss(skops))
 365                /* The kernel is calculating the MSS */
 366                return current_mss_opt_len(skops);
 367
 368        if (skops->skb_len)
 369                return data_opt_len(skops);
 370
 371        return nodata_opt_len(skops);
 372}
 373
 374static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
 375{
 376        __u8 tcp_flags = skops_tcp_flags(skops);
 377        struct tcphdr *th;
 378
 379        if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
 380                return write_synack_opt(skops);
 381
 382        if (tcp_flags & TCPHDR_SYN)
 383                return write_syn_opt(skops);
 384
 385        if (tcp_flags & TCPHDR_FIN)
 386                return write_fin_opt(skops);
 387
 388        th = skops->skb_data;
 389        if (th + 1 > skops->skb_data_end)
 390                RET_CG_ERR(0);
 391
 392        if (skops->skb_len > tcp_hdrlen(th))
 393                return write_data_opt(skops);
 394
 395        return write_nodata_opt(skops);
 396}
 397
 398static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
 399{
 400        __u32 max_delack_us = max_delack_ms * 1000;
 401
 402        return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
 403                              &max_delack_us, sizeof(max_delack_us));
 404}
 405
 406static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
 407{
 408        __u32 min_rto_us = peer_max_delack_ms * 1000;
 409
 410        return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
 411                              sizeof(min_rto_us));
 412}
 413
 414static int handle_active_estab(struct bpf_sock_ops *skops)
 415{
 416        struct hdr_stg init_stg = {
 417                .active = true,
 418        };
 419        int err;
 420
 421        err = load_option(skops, &active_estab_in, false);
 422        if (err && err != -ENOMSG)
 423                RET_CG_ERR(err);
 424
 425        init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
 426                                                OPTION_RESEND);
 427        if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
 428                                              &init_stg,
 429                                              BPF_SK_STORAGE_GET_F_CREATE))
 430                RET_CG_ERR(0);
 431
 432        if (init_stg.resend_syn)
 433                /* Don't clear the write_hdr cb now because
 434                 * the ACK may get lost and retransmit may
 435                 * be needed.
 436                 *
 437                 * PARSE_ALL_HDR cb flag is set to learn if this
 438                 * resend_syn option has received by the peer.
 439                 *
 440                 * The header option will be resent until a valid
 441                 * packet is received at handle_parse_hdr()
 442                 * and all hdr cb flags will be cleared in
 443                 * handle_parse_hdr().
 444                 */
 445                set_parse_all_hdr_cb_flags(skops);
 446        else if (!active_fin_out.flags)
 447                /* No options will be written from now */
 448                clear_hdr_cb_flags(skops);
 449
 450        if (active_syn_out.max_delack_ms) {
 451                err = set_delack_max(skops, active_syn_out.max_delack_ms);
 452                if (err)
 453                        RET_CG_ERR(err);
 454        }
 455
 456        if (active_estab_in.max_delack_ms) {
 457                err = set_rto_min(skops, active_estab_in.max_delack_ms);
 458                if (err)
 459                        RET_CG_ERR(err);
 460        }
 461
 462        return CG_OK;
 463}
 464
 465static int handle_passive_estab(struct bpf_sock_ops *skops)
 466{
 467        struct hdr_stg init_stg = {};
 468        struct tcphdr *th;
 469        int err;
 470
 471        inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
 472
 473        err = load_option(skops, &passive_estab_in, true);
 474        if (err == -ENOENT) {
 475                /* saved_syn is not found. It was in syncookie mode.
 476                 * We have asked the active side to resend the options
 477                 * in ACK, so try to find the bpf_test_option from ACK now.
 478                 */
 479                err = load_option(skops, &passive_estab_in, false);
 480                init_stg.syncookie = true;
 481        }
 482
 483        /* ENOMSG: The bpf_test_option is not found which is fine.
 484         * Bail out now for all other errors.
 485         */
 486        if (err && err != -ENOMSG)
 487                RET_CG_ERR(err);
 488
 489        th = skops->skb_data;
 490        if (th + 1 > skops->skb_data_end)
 491                RET_CG_ERR(0);
 492
 493        if (th->syn) {
 494                /* Fastopen */
 495
 496                /* Cannot clear cb_flags to stop write_hdr cb.
 497                 * synack is not sent yet for fast open.
 498                 * Even it was, the synack may need to be retransmitted.
 499                 *
 500                 * PARSE_ALL_HDR cb flag is set to learn
 501                 * if synack has reached the peer.
 502                 * All cb_flags will be cleared in handle_parse_hdr().
 503                 */
 504                set_parse_all_hdr_cb_flags(skops);
 505                init_stg.fastopen = true;
 506        } else if (!passive_fin_out.flags) {
 507                /* No options will be written from now */
 508                clear_hdr_cb_flags(skops);
 509        }
 510
 511        if (!skops->sk ||
 512            !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
 513                                BPF_SK_STORAGE_GET_F_CREATE))
 514                RET_CG_ERR(0);
 515
 516        if (passive_synack_out.max_delack_ms) {
 517                err = set_delack_max(skops, passive_synack_out.max_delack_ms);
 518                if (err)
 519                        RET_CG_ERR(err);
 520        }
 521
 522        if (passive_estab_in.max_delack_ms) {
 523                err = set_rto_min(skops, passive_estab_in.max_delack_ms);
 524                if (err)
 525                        RET_CG_ERR(err);
 526        }
 527
 528        return CG_OK;
 529}
 530
 531static int handle_parse_hdr(struct bpf_sock_ops *skops)
 532{
 533        struct hdr_stg *hdr_stg;
 534        struct tcphdr *th;
 535
 536        if (!skops->sk)
 537                RET_CG_ERR(0);
 538
 539        th = skops->skb_data;
 540        if (th + 1 > skops->skb_data_end)
 541                RET_CG_ERR(0);
 542
 543        hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
 544        if (!hdr_stg)
 545                RET_CG_ERR(0);
 546
 547        if (hdr_stg->resend_syn || hdr_stg->fastopen)
 548                /* The PARSE_ALL_HDR cb flag was turned on
 549                 * to ensure that the previously written
 550                 * options have reached the peer.
 551                 * Those previously written option includes:
 552                 *     - Active side: resend_syn in ACK during syncookie
 553                 *      or
 554                 *     - Passive side: SYNACK during fastopen
 555                 *
 556                 * A valid packet has been received here after
 557                 * the 3WHS, so the PARSE_ALL_HDR cb flag
 558                 * can be cleared now.
 559                 */
 560                clear_parse_all_hdr_cb_flags(skops);
 561
 562        if (hdr_stg->resend_syn && !active_fin_out.flags)
 563                /* Active side resent the syn option in ACK
 564                 * because the server was in syncookie mode.
 565                 * A valid packet has been received, so
 566                 * clear header cb flags if there is no
 567                 * more option to send.
 568                 */
 569                clear_hdr_cb_flags(skops);
 570
 571        if (hdr_stg->fastopen && !passive_fin_out.flags)
 572                /* Passive side was in fastopen.
 573                 * A valid packet has been received, so
 574                 * the SYNACK has reached the peer.
 575                 * Clear header cb flags if there is no more
 576                 * option to send.
 577                 */
 578                clear_hdr_cb_flags(skops);
 579
 580        if (th->fin) {
 581                struct bpf_test_option *fin_opt;
 582                int err;
 583
 584                if (hdr_stg->active)
 585                        fin_opt = &active_fin_in;
 586                else
 587                        fin_opt = &passive_fin_in;
 588
 589                err = load_option(skops, fin_opt, false);
 590                if (err && err != -ENOMSG)
 591                        RET_CG_ERR(err);
 592        }
 593
 594        return CG_OK;
 595}
 596
 597SEC("sockops")
 598int estab(struct bpf_sock_ops *skops)
 599{
 600        int true_val = 1;
 601
 602        switch (skops->op) {
 603        case BPF_SOCK_OPS_TCP_LISTEN_CB:
 604                bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
 605                               &true_val, sizeof(true_val));
 606                set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
 607                break;
 608        case BPF_SOCK_OPS_TCP_CONNECT_CB:
 609                set_hdr_cb_flags(skops, 0);
 610                break;
 611        case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
 612                return handle_parse_hdr(skops);
 613        case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
 614                return handle_hdr_opt_len(skops);
 615        case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
 616                return handle_write_hdr_opt(skops);
 617        case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
 618                return handle_passive_estab(skops);
 619        case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
 620                return handle_active_estab(skops);
 621        }
 622
 623        return CG_OK;
 624}
 625
 626char _license[] SEC("license") = "GPL";
 627