linux/net/openvswitch/meter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2017 Nicira, Inc.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <linux/if.h>
   9#include <linux/skbuff.h>
  10#include <linux/ip.h>
  11#include <linux/kernel.h>
  12#include <linux/openvswitch.h>
  13#include <linux/netlink.h>
  14#include <linux/rculist.h>
  15
  16#include <net/netlink.h>
  17#include <net/genetlink.h>
  18
  19#include "datapath.h"
  20#include "meter.h"
  21
  22#define METER_HASH_BUCKETS 1024
  23
  24static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
  25        [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
  26        [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
  27        [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  28        [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
  29        [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
  30        [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
  31        [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
  32        [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
  33};
  34
  35static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
  36        [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
  37        [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
  38        [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
  39        [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  40};
  41
  42static void ovs_meter_free(struct dp_meter *meter)
  43{
  44        if (!meter)
  45                return;
  46
  47        kfree_rcu(meter, rcu);
  48}
  49
  50static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
  51                                            u32 meter_id)
  52{
  53        return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
  54}
  55
  56/* Call with ovs_mutex or RCU read lock. */
  57static struct dp_meter *lookup_meter(const struct datapath *dp,
  58                                     u32 meter_id)
  59{
  60        struct dp_meter *meter;
  61        struct hlist_head *head;
  62
  63        head = meter_hash_bucket(dp, meter_id);
  64        hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
  65                if (meter->id == meter_id)
  66                        return meter;
  67        }
  68        return NULL;
  69}
  70
  71static void attach_meter(struct datapath *dp, struct dp_meter *meter)
  72{
  73        struct hlist_head *head = meter_hash_bucket(dp, meter->id);
  74
  75        hlist_add_head_rcu(&meter->dp_hash_node, head);
  76}
  77
  78static void detach_meter(struct dp_meter *meter)
  79{
  80        ASSERT_OVSL();
  81        if (meter)
  82                hlist_del_rcu(&meter->dp_hash_node);
  83}
  84
  85static struct sk_buff *
  86ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
  87                          struct ovs_header **ovs_reply_header)
  88{
  89        struct sk_buff *skb;
  90        struct ovs_header *ovs_header = info->userhdr;
  91
  92        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  93        if (!skb)
  94                return ERR_PTR(-ENOMEM);
  95
  96        *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
  97                                        info->snd_seq,
  98                                        &dp_meter_genl_family, 0, cmd);
  99        if (!*ovs_reply_header) {
 100                nlmsg_free(skb);
 101                return ERR_PTR(-EMSGSIZE);
 102        }
 103        (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
 104
 105        return skb;
 106}
 107
 108static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 109                                     struct dp_meter *meter)
 110{
 111        struct nlattr *nla;
 112        struct dp_meter_band *band;
 113        u16 i;
 114
 115        if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
 116                goto error;
 117
 118        if (!meter)
 119                return 0;
 120
 121        if (nla_put(reply, OVS_METER_ATTR_STATS,
 122                    sizeof(struct ovs_flow_stats), &meter->stats) ||
 123            nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
 124                              OVS_METER_ATTR_PAD))
 125                goto error;
 126
 127        nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
 128        if (!nla)
 129                goto error;
 130
 131        band = meter->bands;
 132
 133        for (i = 0; i < meter->n_bands; ++i, ++band) {
 134                struct nlattr *band_nla;
 135
 136                band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
 137                if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
 138                                         sizeof(struct ovs_flow_stats),
 139                                         &band->stats))
 140                        goto error;
 141                nla_nest_end(reply, band_nla);
 142        }
 143        nla_nest_end(reply, nla);
 144
 145        return 0;
 146error:
 147        return -EMSGSIZE;
 148}
 149
 150static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 151{
 152        struct sk_buff *reply;
 153        struct ovs_header *ovs_reply_header;
 154        struct nlattr *nla, *band_nla;
 155        int err;
 156
 157        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
 158                                          &ovs_reply_header);
 159        if (IS_ERR(reply))
 160                return PTR_ERR(reply);
 161
 162        if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
 163            nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 164                goto nla_put_failure;
 165
 166        nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
 167        if (!nla)
 168                goto nla_put_failure;
 169
 170        band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
 171        if (!band_nla)
 172                goto nla_put_failure;
 173        /* Currently only DROP band type is supported. */
 174        if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
 175                goto nla_put_failure;
 176        nla_nest_end(reply, band_nla);
 177        nla_nest_end(reply, nla);
 178
 179        genlmsg_end(reply, ovs_reply_header);
 180        return genlmsg_reply(reply, info);
 181
 182nla_put_failure:
 183        nlmsg_free(reply);
 184        err = -EMSGSIZE;
 185        return err;
 186}
 187
 188static struct dp_meter *dp_meter_create(struct nlattr **a)
 189{
 190        struct nlattr *nla;
 191        int rem;
 192        u16 n_bands = 0;
 193        struct dp_meter *meter;
 194        struct dp_meter_band *band;
 195        int err;
 196
 197        /* Validate attributes, count the bands. */
 198        if (!a[OVS_METER_ATTR_BANDS])
 199                return ERR_PTR(-EINVAL);
 200
 201        nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
 202                if (++n_bands > DP_MAX_BANDS)
 203                        return ERR_PTR(-EINVAL);
 204
 205        /* Allocate and set up the meter before locking anything. */
 206        meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
 207        if (!meter)
 208                return ERR_PTR(-ENOMEM);
 209
 210        meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 211        meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
 212        meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
 213        meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
 214        spin_lock_init(&meter->lock);
 215        if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
 216                meter->stats = *(struct ovs_flow_stats *)
 217                        nla_data(a[OVS_METER_ATTR_STATS]);
 218        }
 219        meter->n_bands = n_bands;
 220
 221        /* Set up meter bands. */
 222        band = meter->bands;
 223        nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
 224                struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
 225                u32 band_max_delta_t;
 226
 227                err = nla_parse_deprecated((struct nlattr **)&attr,
 228                                           OVS_BAND_ATTR_MAX, nla_data(nla),
 229                                           nla_len(nla), band_policy, NULL);
 230                if (err)
 231                        goto exit_free_meter;
 232
 233                if (!attr[OVS_BAND_ATTR_TYPE] ||
 234                    !attr[OVS_BAND_ATTR_RATE] ||
 235                    !attr[OVS_BAND_ATTR_BURST]) {
 236                        err = -EINVAL;
 237                        goto exit_free_meter;
 238                }
 239
 240                band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
 241                band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
 242                if (band->rate == 0) {
 243                        err = -EINVAL;
 244                        goto exit_free_meter;
 245                }
 246
 247                band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
 248                /* Figure out max delta_t that is enough to fill any bucket.
 249                 * Keep max_delta_t size to the bucket units:
 250                 * pkts => 1/1000 packets, kilobits => bits.
 251                 *
 252                 * Start with a full bucket.
 253                 */
 254                band->bucket = (band->burst_size + band->rate) * 1000;
 255                band_max_delta_t = band->bucket / band->rate;
 256                if (band_max_delta_t > meter->max_delta_t)
 257                        meter->max_delta_t = band_max_delta_t;
 258                band++;
 259        }
 260
 261        return meter;
 262
 263exit_free_meter:
 264        kfree(meter);
 265        return ERR_PTR(err);
 266}
 267
 268static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 269{
 270        struct nlattr **a = info->attrs;
 271        struct dp_meter *meter, *old_meter;
 272        struct sk_buff *reply;
 273        struct ovs_header *ovs_reply_header;
 274        struct ovs_header *ovs_header = info->userhdr;
 275        struct datapath *dp;
 276        int err;
 277        u32 meter_id;
 278        bool failed;
 279
 280        if (!a[OVS_METER_ATTR_ID]) {
 281                return -ENODEV;
 282        }
 283
 284        meter = dp_meter_create(a);
 285        if (IS_ERR_OR_NULL(meter))
 286                return PTR_ERR(meter);
 287
 288        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
 289                                          &ovs_reply_header);
 290        if (IS_ERR(reply)) {
 291                err = PTR_ERR(reply);
 292                goto exit_free_meter;
 293        }
 294
 295        ovs_lock();
 296        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 297        if (!dp) {
 298                err = -ENODEV;
 299                goto exit_unlock;
 300        }
 301
 302        meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 303
 304        /* Cannot fail after this. */
 305        old_meter = lookup_meter(dp, meter_id);
 306        detach_meter(old_meter);
 307        attach_meter(dp, meter);
 308        ovs_unlock();
 309
 310        /* Build response with the meter_id and stats from
 311         * the old meter, if any.
 312         */
 313        failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
 314        WARN_ON(failed);
 315        if (old_meter) {
 316                spin_lock_bh(&old_meter->lock);
 317                if (old_meter->keep_stats) {
 318                        err = ovs_meter_cmd_reply_stats(reply, meter_id,
 319                                                        old_meter);
 320                        WARN_ON(err);
 321                }
 322                spin_unlock_bh(&old_meter->lock);
 323                ovs_meter_free(old_meter);
 324        }
 325
 326        genlmsg_end(reply, ovs_reply_header);
 327        return genlmsg_reply(reply, info);
 328
 329exit_unlock:
 330        ovs_unlock();
 331        nlmsg_free(reply);
 332exit_free_meter:
 333        kfree(meter);
 334        return err;
 335}
 336
 337static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 338{
 339        struct nlattr **a = info->attrs;
 340        u32 meter_id;
 341        struct ovs_header *ovs_header = info->userhdr;
 342        struct ovs_header *ovs_reply_header;
 343        struct datapath *dp;
 344        int err;
 345        struct sk_buff *reply;
 346        struct dp_meter *meter;
 347
 348        if (!a[OVS_METER_ATTR_ID])
 349                return -EINVAL;
 350
 351        meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 352
 353        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
 354                                          &ovs_reply_header);
 355        if (IS_ERR(reply))
 356                return PTR_ERR(reply);
 357
 358        ovs_lock();
 359
 360        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 361        if (!dp) {
 362                err = -ENODEV;
 363                goto exit_unlock;
 364        }
 365
 366        /* Locate meter, copy stats. */
 367        meter = lookup_meter(dp, meter_id);
 368        if (!meter) {
 369                err = -ENOENT;
 370                goto exit_unlock;
 371        }
 372
 373        spin_lock_bh(&meter->lock);
 374        err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
 375        spin_unlock_bh(&meter->lock);
 376        if (err)
 377                goto exit_unlock;
 378
 379        ovs_unlock();
 380
 381        genlmsg_end(reply, ovs_reply_header);
 382        return genlmsg_reply(reply, info);
 383
 384exit_unlock:
 385        ovs_unlock();
 386        nlmsg_free(reply);
 387        return err;
 388}
 389
 390static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 391{
 392        struct nlattr **a = info->attrs;
 393        u32 meter_id;
 394        struct ovs_header *ovs_header = info->userhdr;
 395        struct ovs_header *ovs_reply_header;
 396        struct datapath *dp;
 397        int err;
 398        struct sk_buff *reply;
 399        struct dp_meter *old_meter;
 400
 401        if (!a[OVS_METER_ATTR_ID])
 402                return -EINVAL;
 403        meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 404
 405        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
 406                                          &ovs_reply_header);
 407        if (IS_ERR(reply))
 408                return PTR_ERR(reply);
 409
 410        ovs_lock();
 411
 412        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 413        if (!dp) {
 414                err = -ENODEV;
 415                goto exit_unlock;
 416        }
 417
 418        old_meter = lookup_meter(dp, meter_id);
 419        if (old_meter) {
 420                spin_lock_bh(&old_meter->lock);
 421                err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
 422                WARN_ON(err);
 423                spin_unlock_bh(&old_meter->lock);
 424                detach_meter(old_meter);
 425        }
 426        ovs_unlock();
 427        ovs_meter_free(old_meter);
 428        genlmsg_end(reply, ovs_reply_header);
 429        return genlmsg_reply(reply, info);
 430
 431exit_unlock:
 432        ovs_unlock();
 433        nlmsg_free(reply);
 434        return err;
 435}
 436
 437/* Meter action execution.
 438 *
 439 * Return true 'meter_id' drop band is triggered. The 'skb' should be
 440 * dropped by the caller'.
 441 */
 442bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 443                       struct sw_flow_key *key, u32 meter_id)
 444{
 445        struct dp_meter *meter;
 446        struct dp_meter_band *band;
 447        long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
 448        long long int long_delta_ms;
 449        u32 delta_ms;
 450        u32 cost;
 451        int i, band_exceeded_max = -1;
 452        u32 band_exceeded_rate = 0;
 453
 454        meter = lookup_meter(dp, meter_id);
 455        /* Do not drop the packet when there is no meter. */
 456        if (!meter)
 457                return false;
 458
 459        /* Lock the meter while using it. */
 460        spin_lock(&meter->lock);
 461
 462        long_delta_ms = (now_ms - meter->used); /* ms */
 463
 464        /* Make sure delta_ms will not be too large, so that bucket will not
 465         * wrap around below.
 466         */
 467        delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
 468                   ? meter->max_delta_t : (u32)long_delta_ms;
 469
 470        /* Update meter statistics.
 471         */
 472        meter->used = now_ms;
 473        meter->stats.n_packets += 1;
 474        meter->stats.n_bytes += skb->len;
 475
 476        /* Bucket rate is either in kilobits per second, or in packets per
 477         * second.  We maintain the bucket in the units of either bits or
 478         * 1/1000th of a packet, correspondingly.
 479         * Then, when rate is multiplied with milliseconds, we get the
 480         * bucket units:
 481         * msec * kbps = bits, and
 482         * msec * packets/sec = 1/1000 packets.
 483         *
 484         * 'cost' is the number of bucket units in this packet.
 485         */
 486        cost = (meter->kbps) ? skb->len * 8 : 1000;
 487
 488        /* Update all bands and find the one hit with the highest rate. */
 489        for (i = 0; i < meter->n_bands; ++i) {
 490                long long int max_bucket_size;
 491
 492                band = &meter->bands[i];
 493                max_bucket_size = (band->burst_size + band->rate) * 1000LL;
 494
 495                band->bucket += delta_ms * band->rate;
 496                if (band->bucket > max_bucket_size)
 497                        band->bucket = max_bucket_size;
 498
 499                if (band->bucket >= cost) {
 500                        band->bucket -= cost;
 501                } else if (band->rate > band_exceeded_rate) {
 502                        band_exceeded_rate = band->rate;
 503                        band_exceeded_max = i;
 504                }
 505        }
 506
 507        if (band_exceeded_max >= 0) {
 508                /* Update band statistics. */
 509                band = &meter->bands[band_exceeded_max];
 510                band->stats.n_packets += 1;
 511                band->stats.n_bytes += skb->len;
 512
 513                /* Drop band triggered, let the caller drop the 'skb'.  */
 514                if (band->type == OVS_METER_BAND_TYPE_DROP) {
 515                        spin_unlock(&meter->lock);
 516                        return true;
 517                }
 518        }
 519
 520        spin_unlock(&meter->lock);
 521        return false;
 522}
 523
 524static struct genl_ops dp_meter_genl_ops[] = {
 525        { .cmd = OVS_METER_CMD_FEATURES,
 526                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 527                .flags = 0,               /* OK for unprivileged users. */
 528                .doit = ovs_meter_cmd_features
 529        },
 530        { .cmd = OVS_METER_CMD_SET,
 531                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 532                .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 533                                           *  privilege.
 534                                           */
 535                .doit = ovs_meter_cmd_set,
 536        },
 537        { .cmd = OVS_METER_CMD_GET,
 538                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 539                .flags = 0,               /* OK for unprivileged users. */
 540                .doit = ovs_meter_cmd_get,
 541        },
 542        { .cmd = OVS_METER_CMD_DEL,
 543                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 544                .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 545                                           *  privilege.
 546                                           */
 547                .doit = ovs_meter_cmd_del
 548        },
 549};
 550
 551static const struct genl_multicast_group ovs_meter_multicast_group = {
 552        .name = OVS_METER_MCGROUP,
 553};
 554
 555struct genl_family dp_meter_genl_family __ro_after_init = {
 556        .hdrsize = sizeof(struct ovs_header),
 557        .name = OVS_METER_FAMILY,
 558        .version = OVS_METER_VERSION,
 559        .maxattr = OVS_METER_ATTR_MAX,
 560        .policy = meter_policy,
 561        .netnsok = true,
 562        .parallel_ops = true,
 563        .ops = dp_meter_genl_ops,
 564        .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
 565        .mcgrps = &ovs_meter_multicast_group,
 566        .n_mcgrps = 1,
 567        .module = THIS_MODULE,
 568};
 569
 570int ovs_meters_init(struct datapath *dp)
 571{
 572        int i;
 573
 574        dp->meters = kmalloc_array(METER_HASH_BUCKETS,
 575                                   sizeof(struct hlist_head), GFP_KERNEL);
 576
 577        if (!dp->meters)
 578                return -ENOMEM;
 579
 580        for (i = 0; i < METER_HASH_BUCKETS; i++)
 581                INIT_HLIST_HEAD(&dp->meters[i]);
 582
 583        return 0;
 584}
 585
 586void ovs_meters_exit(struct datapath *dp)
 587{
 588        int i;
 589
 590        for (i = 0; i < METER_HASH_BUCKETS; i++) {
 591                struct hlist_head *head = &dp->meters[i];
 592                struct dp_meter *meter;
 593                struct hlist_node *n;
 594
 595                hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
 596                        kfree(meter);
 597        }
 598
 599        kfree(dp->meters);
 600}
 601