linux/net/psample/psample.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * net/psample/psample.c - Netlink channel for packet sampling
   4 * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
   5 */
   6
   7#include <linux/types.h>
   8#include <linux/kernel.h>
   9#include <linux/skbuff.h>
  10#include <linux/module.h>
  11#include <linux/timekeeping.h>
  12#include <net/net_namespace.h>
  13#include <net/sock.h>
  14#include <net/netlink.h>
  15#include <net/genetlink.h>
  16#include <net/psample.h>
  17#include <linux/spinlock.h>
  18#include <net/ip_tunnels.h>
  19#include <net/dst_metadata.h>
  20
  21#define PSAMPLE_MAX_PACKET_SIZE 0xffff
  22
  23static LIST_HEAD(psample_groups_list);
  24static DEFINE_SPINLOCK(psample_groups_lock);
  25
  26/* multicast groups */
  27enum psample_nl_multicast_groups {
  28        PSAMPLE_NL_MCGRP_CONFIG,
  29        PSAMPLE_NL_MCGRP_SAMPLE,
  30};
  31
  32static const struct genl_multicast_group psample_nl_mcgrps[] = {
  33        [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
  34        [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME },
  35};
  36
  37static struct genl_family psample_nl_family __ro_after_init;
  38
  39static int psample_group_nl_fill(struct sk_buff *msg,
  40                                 struct psample_group *group,
  41                                 enum psample_command cmd, u32 portid, u32 seq,
  42                                 int flags)
  43{
  44        void *hdr;
  45        int ret;
  46
  47        hdr = genlmsg_put(msg, portid, seq, &psample_nl_family, flags, cmd);
  48        if (!hdr)
  49                return -EMSGSIZE;
  50
  51        ret = nla_put_u32(msg, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
  52        if (ret < 0)
  53                goto error;
  54
  55        ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_REFCOUNT, group->refcount);
  56        if (ret < 0)
  57                goto error;
  58
  59        ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_SEQ, group->seq);
  60        if (ret < 0)
  61                goto error;
  62
  63        genlmsg_end(msg, hdr);
  64        return 0;
  65
  66error:
  67        genlmsg_cancel(msg, hdr);
  68        return -EMSGSIZE;
  69}
  70
  71static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
  72                                           struct netlink_callback *cb)
  73{
  74        struct psample_group *group;
  75        int start = cb->args[0];
  76        int idx = 0;
  77        int err;
  78
  79        spin_lock_bh(&psample_groups_lock);
  80        list_for_each_entry(group, &psample_groups_list, list) {
  81                if (!net_eq(group->net, sock_net(msg->sk)))
  82                        continue;
  83                if (idx < start) {
  84                        idx++;
  85                        continue;
  86                }
  87                err = psample_group_nl_fill(msg, group, PSAMPLE_CMD_NEW_GROUP,
  88                                            NETLINK_CB(cb->skb).portid,
  89                                            cb->nlh->nlmsg_seq, NLM_F_MULTI);
  90                if (err)
  91                        break;
  92                idx++;
  93        }
  94
  95        spin_unlock_bh(&psample_groups_lock);
  96        cb->args[0] = idx;
  97        return msg->len;
  98}
  99
 100static const struct genl_small_ops psample_nl_ops[] = {
 101        {
 102                .cmd = PSAMPLE_CMD_GET_GROUP,
 103                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 104                .dumpit = psample_nl_cmd_get_group_dumpit,
 105                /* can be retrieved by unprivileged users */
 106        }
 107};
 108
 109static struct genl_family psample_nl_family __ro_after_init = {
 110        .name           = PSAMPLE_GENL_NAME,
 111        .version        = PSAMPLE_GENL_VERSION,
 112        .maxattr        = PSAMPLE_ATTR_MAX,
 113        .netnsok        = true,
 114        .module         = THIS_MODULE,
 115        .mcgrps         = psample_nl_mcgrps,
 116        .small_ops      = psample_nl_ops,
 117        .n_small_ops    = ARRAY_SIZE(psample_nl_ops),
 118        .n_mcgrps       = ARRAY_SIZE(psample_nl_mcgrps),
 119};
 120
 121static void psample_group_notify(struct psample_group *group,
 122                                 enum psample_command cmd)
 123{
 124        struct sk_buff *msg;
 125        int err;
 126
 127        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
 128        if (!msg)
 129                return;
 130
 131        err = psample_group_nl_fill(msg, group, cmd, 0, 0, NLM_F_MULTI);
 132        if (!err)
 133                genlmsg_multicast_netns(&psample_nl_family, group->net, msg, 0,
 134                                        PSAMPLE_NL_MCGRP_CONFIG, GFP_ATOMIC);
 135        else
 136                nlmsg_free(msg);
 137}
 138
 139static struct psample_group *psample_group_create(struct net *net,
 140                                                  u32 group_num)
 141{
 142        struct psample_group *group;
 143
 144        group = kzalloc(sizeof(*group), GFP_ATOMIC);
 145        if (!group)
 146                return NULL;
 147
 148        group->net = net;
 149        group->group_num = group_num;
 150        list_add_tail(&group->list, &psample_groups_list);
 151
 152        psample_group_notify(group, PSAMPLE_CMD_NEW_GROUP);
 153        return group;
 154}
 155
 156static void psample_group_destroy(struct psample_group *group)
 157{
 158        psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
 159        list_del(&group->list);
 160        kfree_rcu(group, rcu);
 161}
 162
 163static struct psample_group *
 164psample_group_lookup(struct net *net, u32 group_num)
 165{
 166        struct psample_group *group;
 167
 168        list_for_each_entry(group, &psample_groups_list, list)
 169                if ((group->group_num == group_num) && (group->net == net))
 170                        return group;
 171        return NULL;
 172}
 173
 174struct psample_group *psample_group_get(struct net *net, u32 group_num)
 175{
 176        struct psample_group *group;
 177
 178        spin_lock_bh(&psample_groups_lock);
 179
 180        group = psample_group_lookup(net, group_num);
 181        if (!group) {
 182                group = psample_group_create(net, group_num);
 183                if (!group)
 184                        goto out;
 185        }
 186        group->refcount++;
 187
 188out:
 189        spin_unlock_bh(&psample_groups_lock);
 190        return group;
 191}
 192EXPORT_SYMBOL_GPL(psample_group_get);
 193
 194void psample_group_take(struct psample_group *group)
 195{
 196        spin_lock_bh(&psample_groups_lock);
 197        group->refcount++;
 198        spin_unlock_bh(&psample_groups_lock);
 199}
 200EXPORT_SYMBOL_GPL(psample_group_take);
 201
 202void psample_group_put(struct psample_group *group)
 203{
 204        spin_lock_bh(&psample_groups_lock);
 205
 206        if (--group->refcount == 0)
 207                psample_group_destroy(group);
 208
 209        spin_unlock_bh(&psample_groups_lock);
 210}
 211EXPORT_SYMBOL_GPL(psample_group_put);
 212
 213#ifdef CONFIG_INET
 214static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
 215                              struct ip_tunnel_info *tun_info)
 216{
 217        unsigned short tun_proto = ip_tunnel_info_af(tun_info);
 218        const void *tun_opts = ip_tunnel_info_opts(tun_info);
 219        const struct ip_tunnel_key *tun_key = &tun_info->key;
 220        int tun_opts_len = tun_info->options_len;
 221
 222        if (tun_key->tun_flags & TUNNEL_KEY &&
 223            nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id,
 224                         PSAMPLE_TUNNEL_KEY_ATTR_PAD))
 225                return -EMSGSIZE;
 226
 227        if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE &&
 228            nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE))
 229                return -EMSGSIZE;
 230
 231        switch (tun_proto) {
 232        case AF_INET:
 233                if (tun_key->u.ipv4.src &&
 234                    nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC,
 235                                    tun_key->u.ipv4.src))
 236                        return -EMSGSIZE;
 237                if (tun_key->u.ipv4.dst &&
 238                    nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST,
 239                                    tun_key->u.ipv4.dst))
 240                        return -EMSGSIZE;
 241                break;
 242        case AF_INET6:
 243                if (!ipv6_addr_any(&tun_key->u.ipv6.src) &&
 244                    nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC,
 245                                     &tun_key->u.ipv6.src))
 246                        return -EMSGSIZE;
 247                if (!ipv6_addr_any(&tun_key->u.ipv6.dst) &&
 248                    nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST,
 249                                     &tun_key->u.ipv6.dst))
 250                        return -EMSGSIZE;
 251                break;
 252        }
 253        if (tun_key->tos &&
 254            nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos))
 255                return -EMSGSIZE;
 256        if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl))
 257                return -EMSGSIZE;
 258        if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
 259            nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
 260                return -EMSGSIZE;
 261        if ((tun_key->tun_flags & TUNNEL_CSUM) &&
 262            nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM))
 263                return -EMSGSIZE;
 264        if (tun_key->tp_src &&
 265            nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src))
 266                return -EMSGSIZE;
 267        if (tun_key->tp_dst &&
 268            nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst))
 269                return -EMSGSIZE;
 270        if ((tun_key->tun_flags & TUNNEL_OAM) &&
 271            nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM))
 272                return -EMSGSIZE;
 273        if (tun_opts_len) {
 274                if (tun_key->tun_flags & TUNNEL_GENEVE_OPT &&
 275                    nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,
 276                            tun_opts_len, tun_opts))
 277                        return -EMSGSIZE;
 278                else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT &&
 279                         nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
 280                                 tun_opts_len, tun_opts))
 281                        return -EMSGSIZE;
 282        }
 283
 284        return 0;
 285}
 286
 287static int psample_ip_tun_to_nlattr(struct sk_buff *skb,
 288                            struct ip_tunnel_info *tun_info)
 289{
 290        struct nlattr *nla;
 291        int err;
 292
 293        nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL);
 294        if (!nla)
 295                return -EMSGSIZE;
 296
 297        err = __psample_ip_tun_to_nlattr(skb, tun_info);
 298        if (err) {
 299                nla_nest_cancel(skb, nla);
 300                return err;
 301        }
 302
 303        nla_nest_end(skb, nla);
 304
 305        return 0;
 306}
 307
 308static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
 309{
 310        unsigned short tun_proto = ip_tunnel_info_af(tun_info);
 311        const struct ip_tunnel_key *tun_key = &tun_info->key;
 312        int tun_opts_len = tun_info->options_len;
 313        int sum = nla_total_size(0);    /* PSAMPLE_ATTR_TUNNEL */
 314
 315        if (tun_key->tun_flags & TUNNEL_KEY)
 316                sum += nla_total_size_64bit(sizeof(u64));
 317
 318        if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
 319                sum += nla_total_size(0);
 320
 321        switch (tun_proto) {
 322        case AF_INET:
 323                if (tun_key->u.ipv4.src)
 324                        sum += nla_total_size(sizeof(u32));
 325                if (tun_key->u.ipv4.dst)
 326                        sum += nla_total_size(sizeof(u32));
 327                break;
 328        case AF_INET6:
 329                if (!ipv6_addr_any(&tun_key->u.ipv6.src))
 330                        sum += nla_total_size(sizeof(struct in6_addr));
 331                if (!ipv6_addr_any(&tun_key->u.ipv6.dst))
 332                        sum += nla_total_size(sizeof(struct in6_addr));
 333                break;
 334        }
 335        if (tun_key->tos)
 336                sum += nla_total_size(sizeof(u8));
 337        sum += nla_total_size(sizeof(u8));      /* TTL */
 338        if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT)
 339                sum += nla_total_size(0);
 340        if (tun_key->tun_flags & TUNNEL_CSUM)
 341                sum += nla_total_size(0);
 342        if (tun_key->tp_src)
 343                sum += nla_total_size(sizeof(u16));
 344        if (tun_key->tp_dst)
 345                sum += nla_total_size(sizeof(u16));
 346        if (tun_key->tun_flags & TUNNEL_OAM)
 347                sum += nla_total_size(0);
 348        if (tun_opts_len) {
 349                if (tun_key->tun_flags & TUNNEL_GENEVE_OPT)
 350                        sum += nla_total_size(tun_opts_len);
 351                else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT)
 352                        sum += nla_total_size(tun_opts_len);
 353        }
 354
 355        return sum;
 356}
 357#endif
 358
 359void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 360                           u32 sample_rate, const struct psample_metadata *md)
 361{
 362        ktime_t tstamp = ktime_get_real();
 363        int out_ifindex = md->out_ifindex;
 364        int in_ifindex = md->in_ifindex;
 365        u32 trunc_size = md->trunc_size;
 366#ifdef CONFIG_INET
 367        struct ip_tunnel_info *tun_info;
 368#endif
 369        struct sk_buff *nl_skb;
 370        int data_len;
 371        int meta_len;
 372        void *data;
 373        int ret;
 374
 375        meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
 376                   (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
 377                   (md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) +
 378                   (md->out_tc_occ_valid ? nla_total_size_64bit(sizeof(u64)) : 0) +
 379                   (md->latency_valid ? nla_total_size_64bit(sizeof(u64)) : 0) +
 380                   nla_total_size(sizeof(u32)) +        /* sample_rate */
 381                   nla_total_size(sizeof(u32)) +        /* orig_size */
 382                   nla_total_size(sizeof(u32)) +        /* group_num */
 383                   nla_total_size(sizeof(u32)) +        /* seq */
 384                   nla_total_size_64bit(sizeof(u64)) +  /* timestamp */
 385                   nla_total_size(sizeof(u16));         /* protocol */
 386
 387#ifdef CONFIG_INET
 388        tun_info = skb_tunnel_info(skb);
 389        if (tun_info)
 390                meta_len += psample_tunnel_meta_len(tun_info);
 391#endif
 392
 393        data_len = min(skb->len, trunc_size);
 394        if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE)
 395                data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN
 396                            - NLA_ALIGNTO;
 397
 398        nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC);
 399        if (unlikely(!nl_skb))
 400                return;
 401
 402        data = genlmsg_put(nl_skb, 0, 0, &psample_nl_family, 0,
 403                           PSAMPLE_CMD_SAMPLE);
 404        if (unlikely(!data))
 405                goto error;
 406
 407        if (in_ifindex) {
 408                ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_IIFINDEX, in_ifindex);
 409                if (unlikely(ret < 0))
 410                        goto error;
 411        }
 412
 413        if (out_ifindex) {
 414                ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OIFINDEX, out_ifindex);
 415                if (unlikely(ret < 0))
 416                        goto error;
 417        }
 418
 419        ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_RATE, sample_rate);
 420        if (unlikely(ret < 0))
 421                goto error;
 422
 423        ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_ORIGSIZE, skb->len);
 424        if (unlikely(ret < 0))
 425                goto error;
 426
 427        ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
 428        if (unlikely(ret < 0))
 429                goto error;
 430
 431        ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_GROUP_SEQ, group->seq++);
 432        if (unlikely(ret < 0))
 433                goto error;
 434
 435        if (md->out_tc_valid) {
 436                ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OUT_TC, md->out_tc);
 437                if (unlikely(ret < 0))
 438                        goto error;
 439        }
 440
 441        if (md->out_tc_occ_valid) {
 442                ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_OUT_TC_OCC,
 443                                        md->out_tc_occ, PSAMPLE_ATTR_PAD);
 444                if (unlikely(ret < 0))
 445                        goto error;
 446        }
 447
 448        if (md->latency_valid) {
 449                ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_LATENCY,
 450                                        md->latency, PSAMPLE_ATTR_PAD);
 451                if (unlikely(ret < 0))
 452                        goto error;
 453        }
 454
 455        ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_TIMESTAMP,
 456                                ktime_to_ns(tstamp), PSAMPLE_ATTR_PAD);
 457        if (unlikely(ret < 0))
 458                goto error;
 459
 460        ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_PROTO,
 461                          be16_to_cpu(skb->protocol));
 462        if (unlikely(ret < 0))
 463                goto error;
 464
 465        if (data_len) {
 466                int nla_len = nla_total_size(data_len);
 467                struct nlattr *nla;
 468
 469                nla = skb_put(nl_skb, nla_len);
 470                nla->nla_type = PSAMPLE_ATTR_DATA;
 471                nla->nla_len = nla_attr_size(data_len);
 472
 473                if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
 474                        goto error;
 475        }
 476
 477#ifdef CONFIG_INET
 478        if (tun_info) {
 479                ret = psample_ip_tun_to_nlattr(nl_skb, tun_info);
 480                if (unlikely(ret < 0))
 481                        goto error;
 482        }
 483#endif
 484
 485        genlmsg_end(nl_skb, data);
 486        genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
 487                                PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
 488
 489        return;
 490error:
 491        pr_err_ratelimited("Could not create psample log message\n");
 492        nlmsg_free(nl_skb);
 493}
 494EXPORT_SYMBOL_GPL(psample_sample_packet);
 495
 496static int __init psample_module_init(void)
 497{
 498        return genl_register_family(&psample_nl_family);
 499}
 500
 501static void __exit psample_module_exit(void)
 502{
 503        genl_unregister_family(&psample_nl_family);
 504}
 505
 506module_init(psample_module_init);
 507module_exit(psample_module_exit);
 508
 509MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
 510MODULE_DESCRIPTION("netlink channel for packet sampling");
 511MODULE_LICENSE("GPL v2");
 512