linux/net/openvswitch/meter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2017 Nicira, Inc.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <linux/if.h>
   9#include <linux/skbuff.h>
  10#include <linux/ip.h>
  11#include <linux/kernel.h>
  12#include <linux/openvswitch.h>
  13#include <linux/netlink.h>
  14#include <linux/rculist.h>
  15
  16#include <net/netlink.h>
  17#include <net/genetlink.h>
  18
  19#include "datapath.h"
  20#include "meter.h"
  21
  22#define METER_HASH_BUCKETS 1024
  23
  24static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
  25        [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
  26        [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
  27        [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  28        [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
  29        [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
  30        [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
  31        [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
  32        [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
  33};
  34
  35static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
  36        [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
  37        [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
  38        [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
  39        [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  40};
  41
  42static void ovs_meter_free(struct dp_meter *meter)
  43{
  44        if (!meter)
  45                return;
  46
  47        kfree_rcu(meter, rcu);
  48}
  49
  50static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
  51                                            u32 meter_id)
  52{
  53        return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
  54}
  55
  56/* Call with ovs_mutex or RCU read lock. */
  57static struct dp_meter *lookup_meter(const struct datapath *dp,
  58                                     u32 meter_id)
  59{
  60        struct dp_meter *meter;
  61        struct hlist_head *head;
  62
  63        head = meter_hash_bucket(dp, meter_id);
  64        hlist_for_each_entry_rcu(meter, head, dp_hash_node,
  65                                lockdep_ovsl_is_held()) {
  66                if (meter->id == meter_id)
  67                        return meter;
  68        }
  69        return NULL;
  70}
  71
  72static void attach_meter(struct datapath *dp, struct dp_meter *meter)
  73{
  74        struct hlist_head *head = meter_hash_bucket(dp, meter->id);
  75
  76        hlist_add_head_rcu(&meter->dp_hash_node, head);
  77}
  78
  79static void detach_meter(struct dp_meter *meter)
  80{
  81        ASSERT_OVSL();
  82        if (meter)
  83                hlist_del_rcu(&meter->dp_hash_node);
  84}
  85
  86static struct sk_buff *
  87ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
  88                          struct ovs_header **ovs_reply_header)
  89{
  90        struct sk_buff *skb;
  91        struct ovs_header *ovs_header = info->userhdr;
  92
  93        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  94        if (!skb)
  95                return ERR_PTR(-ENOMEM);
  96
  97        *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
  98                                        info->snd_seq,
  99                                        &dp_meter_genl_family, 0, cmd);
 100        if (!*ovs_reply_header) {
 101                nlmsg_free(skb);
 102                return ERR_PTR(-EMSGSIZE);
 103        }
 104        (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
 105
 106        return skb;
 107}
 108
 109static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 110                                     struct dp_meter *meter)
 111{
 112        struct nlattr *nla;
 113        struct dp_meter_band *band;
 114        u16 i;
 115
 116        if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
 117                goto error;
 118
 119        if (!meter)
 120                return 0;
 121
 122        if (nla_put(reply, OVS_METER_ATTR_STATS,
 123                    sizeof(struct ovs_flow_stats), &meter->stats) ||
 124            nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
 125                              OVS_METER_ATTR_PAD))
 126                goto error;
 127
 128        nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
 129        if (!nla)
 130                goto error;
 131
 132        band = meter->bands;
 133
 134        for (i = 0; i < meter->n_bands; ++i, ++band) {
 135                struct nlattr *band_nla;
 136
 137                band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
 138                if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
 139                                         sizeof(struct ovs_flow_stats),
 140                                         &band->stats))
 141                        goto error;
 142                nla_nest_end(reply, band_nla);
 143        }
 144        nla_nest_end(reply, nla);
 145
 146        return 0;
 147error:
 148        return -EMSGSIZE;
 149}
 150
 151static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 152{
 153        struct sk_buff *reply;
 154        struct ovs_header *ovs_reply_header;
 155        struct nlattr *nla, *band_nla;
 156        int err;
 157
 158        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
 159                                          &ovs_reply_header);
 160        if (IS_ERR(reply))
 161                return PTR_ERR(reply);
 162
 163        if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
 164            nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 165                goto nla_put_failure;
 166
 167        nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
 168        if (!nla)
 169                goto nla_put_failure;
 170
 171        band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
 172        if (!band_nla)
 173                goto nla_put_failure;
 174        /* Currently only DROP band type is supported. */
 175        if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
 176                goto nla_put_failure;
 177        nla_nest_end(reply, band_nla);
 178        nla_nest_end(reply, nla);
 179
 180        genlmsg_end(reply, ovs_reply_header);
 181        return genlmsg_reply(reply, info);
 182
 183nla_put_failure:
 184        nlmsg_free(reply);
 185        err = -EMSGSIZE;
 186        return err;
 187}
 188
 189static struct dp_meter *dp_meter_create(struct nlattr **a)
 190{
 191        struct nlattr *nla;
 192        int rem;
 193        u16 n_bands = 0;
 194        struct dp_meter *meter;
 195        struct dp_meter_band *band;
 196        int err;
 197
 198        /* Validate attributes, count the bands. */
 199        if (!a[OVS_METER_ATTR_BANDS])
 200                return ERR_PTR(-EINVAL);
 201
 202        nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
 203                if (++n_bands > DP_MAX_BANDS)
 204                        return ERR_PTR(-EINVAL);
 205
 206        /* Allocate and set up the meter before locking anything. */
 207        meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
 208        if (!meter)
 209                return ERR_PTR(-ENOMEM);
 210
 211        meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 212        meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
 213        meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
 214        meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
 215        spin_lock_init(&meter->lock);
 216        if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
 217                meter->stats = *(struct ovs_flow_stats *)
 218                        nla_data(a[OVS_METER_ATTR_STATS]);
 219        }
 220        meter->n_bands = n_bands;
 221
 222        /* Set up meter bands. */
 223        band = meter->bands;
 224        nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
 225                struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
 226                u32 band_max_delta_t;
 227
 228                err = nla_parse_deprecated((struct nlattr **)&attr,
 229                                           OVS_BAND_ATTR_MAX, nla_data(nla),
 230                                           nla_len(nla), band_policy, NULL);
 231                if (err)
 232                        goto exit_free_meter;
 233
 234                if (!attr[OVS_BAND_ATTR_TYPE] ||
 235                    !attr[OVS_BAND_ATTR_RATE] ||
 236                    !attr[OVS_BAND_ATTR_BURST]) {
 237                        err = -EINVAL;
 238                        goto exit_free_meter;
 239                }
 240
 241                band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
 242                band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
 243                if (band->rate == 0) {
 244                        err = -EINVAL;
 245                        goto exit_free_meter;
 246                }
 247
 248                band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
 249                /* Figure out max delta_t that is enough to fill any bucket.
 250                 * Keep max_delta_t size to the bucket units:
 251                 * pkts => 1/1000 packets, kilobits => bits.
 252                 *
 253                 * Start with a full bucket.
 254                 */
 255                band->bucket = (band->burst_size + band->rate) * 1000;
 256                band_max_delta_t = band->bucket / band->rate;
 257                if (band_max_delta_t > meter->max_delta_t)
 258                        meter->max_delta_t = band_max_delta_t;
 259                band++;
 260        }
 261
 262        return meter;
 263
 264exit_free_meter:
 265        kfree(meter);
 266        return ERR_PTR(err);
 267}
 268
 269static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 270{
 271        struct nlattr **a = info->attrs;
 272        struct dp_meter *meter, *old_meter;
 273        struct sk_buff *reply;
 274        struct ovs_header *ovs_reply_header;
 275        struct ovs_header *ovs_header = info->userhdr;
 276        struct datapath *dp;
 277        int err;
 278        u32 meter_id;
 279        bool failed;
 280
 281        if (!a[OVS_METER_ATTR_ID]) {
 282                return -ENODEV;
 283        }
 284
 285        meter = dp_meter_create(a);
 286        if (IS_ERR_OR_NULL(meter))
 287                return PTR_ERR(meter);
 288
 289        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
 290                                          &ovs_reply_header);
 291        if (IS_ERR(reply)) {
 292                err = PTR_ERR(reply);
 293                goto exit_free_meter;
 294        }
 295
 296        ovs_lock();
 297        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 298        if (!dp) {
 299                err = -ENODEV;
 300                goto exit_unlock;
 301        }
 302
 303        meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 304
 305        /* Cannot fail after this. */
 306        old_meter = lookup_meter(dp, meter_id);
 307        detach_meter(old_meter);
 308        attach_meter(dp, meter);
 309        ovs_unlock();
 310
 311        /* Build response with the meter_id and stats from
 312         * the old meter, if any.
 313         */
 314        failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
 315        WARN_ON(failed);
 316        if (old_meter) {
 317                spin_lock_bh(&old_meter->lock);
 318                if (old_meter->keep_stats) {
 319                        err = ovs_meter_cmd_reply_stats(reply, meter_id,
 320                                                        old_meter);
 321                        WARN_ON(err);
 322                }
 323                spin_unlock_bh(&old_meter->lock);
 324                ovs_meter_free(old_meter);
 325        }
 326
 327        genlmsg_end(reply, ovs_reply_header);
 328        return genlmsg_reply(reply, info);
 329
 330exit_unlock:
 331        ovs_unlock();
 332        nlmsg_free(reply);
 333exit_free_meter:
 334        kfree(meter);
 335        return err;
 336}
 337
 338static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 339{
 340        struct nlattr **a = info->attrs;
 341        u32 meter_id;
 342        struct ovs_header *ovs_header = info->userhdr;
 343        struct ovs_header *ovs_reply_header;
 344        struct datapath *dp;
 345        int err;
 346        struct sk_buff *reply;
 347        struct dp_meter *meter;
 348
 349        if (!a[OVS_METER_ATTR_ID])
 350                return -EINVAL;
 351
 352        meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 353
 354        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
 355                                          &ovs_reply_header);
 356        if (IS_ERR(reply))
 357                return PTR_ERR(reply);
 358
 359        ovs_lock();
 360
 361        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 362        if (!dp) {
 363                err = -ENODEV;
 364                goto exit_unlock;
 365        }
 366
 367        /* Locate meter, copy stats. */
 368        meter = lookup_meter(dp, meter_id);
 369        if (!meter) {
 370                err = -ENOENT;
 371                goto exit_unlock;
 372        }
 373
 374        spin_lock_bh(&meter->lock);
 375        err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
 376        spin_unlock_bh(&meter->lock);
 377        if (err)
 378                goto exit_unlock;
 379
 380        ovs_unlock();
 381
 382        genlmsg_end(reply, ovs_reply_header);
 383        return genlmsg_reply(reply, info);
 384
 385exit_unlock:
 386        ovs_unlock();
 387        nlmsg_free(reply);
 388        return err;
 389}
 390
 391static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 392{
 393        struct nlattr **a = info->attrs;
 394        u32 meter_id;
 395        struct ovs_header *ovs_header = info->userhdr;
 396        struct ovs_header *ovs_reply_header;
 397        struct datapath *dp;
 398        int err;
 399        struct sk_buff *reply;
 400        struct dp_meter *old_meter;
 401
 402        if (!a[OVS_METER_ATTR_ID])
 403                return -EINVAL;
 404        meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 405
 406        reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
 407                                          &ovs_reply_header);
 408        if (IS_ERR(reply))
 409                return PTR_ERR(reply);
 410
 411        ovs_lock();
 412
 413        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 414        if (!dp) {
 415                err = -ENODEV;
 416                goto exit_unlock;
 417        }
 418
 419        old_meter = lookup_meter(dp, meter_id);
 420        if (old_meter) {
 421                spin_lock_bh(&old_meter->lock);
 422                err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
 423                WARN_ON(err);
 424                spin_unlock_bh(&old_meter->lock);
 425                detach_meter(old_meter);
 426        }
 427        ovs_unlock();
 428        ovs_meter_free(old_meter);
 429        genlmsg_end(reply, ovs_reply_header);
 430        return genlmsg_reply(reply, info);
 431
 432exit_unlock:
 433        ovs_unlock();
 434        nlmsg_free(reply);
 435        return err;
 436}
 437
 438/* Meter action execution.
 439 *
 440 * Return true 'meter_id' drop band is triggered. The 'skb' should be
 441 * dropped by the caller'.
 442 */
 443bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 444                       struct sw_flow_key *key, u32 meter_id)
 445{
 446        struct dp_meter *meter;
 447        struct dp_meter_band *band;
 448        long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
 449        long long int long_delta_ms;
 450        u32 delta_ms;
 451        u32 cost;
 452        int i, band_exceeded_max = -1;
 453        u32 band_exceeded_rate = 0;
 454
 455        meter = lookup_meter(dp, meter_id);
 456        /* Do not drop the packet when there is no meter. */
 457        if (!meter)
 458                return false;
 459
 460        /* Lock the meter while using it. */
 461        spin_lock(&meter->lock);
 462
 463        long_delta_ms = (now_ms - meter->used); /* ms */
 464
 465        /* Make sure delta_ms will not be too large, so that bucket will not
 466         * wrap around below.
 467         */
 468        delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
 469                   ? meter->max_delta_t : (u32)long_delta_ms;
 470
 471        /* Update meter statistics.
 472         */
 473        meter->used = now_ms;
 474        meter->stats.n_packets += 1;
 475        meter->stats.n_bytes += skb->len;
 476
 477        /* Bucket rate is either in kilobits per second, or in packets per
 478         * second.  We maintain the bucket in the units of either bits or
 479         * 1/1000th of a packet, correspondingly.
 480         * Then, when rate is multiplied with milliseconds, we get the
 481         * bucket units:
 482         * msec * kbps = bits, and
 483         * msec * packets/sec = 1/1000 packets.
 484         *
 485         * 'cost' is the number of bucket units in this packet.
 486         */
 487        cost = (meter->kbps) ? skb->len * 8 : 1000;
 488
 489        /* Update all bands and find the one hit with the highest rate. */
 490        for (i = 0; i < meter->n_bands; ++i) {
 491                long long int max_bucket_size;
 492
 493                band = &meter->bands[i];
 494                max_bucket_size = (band->burst_size + band->rate) * 1000LL;
 495
 496                band->bucket += delta_ms * band->rate;
 497                if (band->bucket > max_bucket_size)
 498                        band->bucket = max_bucket_size;
 499
 500                if (band->bucket >= cost) {
 501                        band->bucket -= cost;
 502                } else if (band->rate > band_exceeded_rate) {
 503                        band_exceeded_rate = band->rate;
 504                        band_exceeded_max = i;
 505                }
 506        }
 507
 508        if (band_exceeded_max >= 0) {
 509                /* Update band statistics. */
 510                band = &meter->bands[band_exceeded_max];
 511                band->stats.n_packets += 1;
 512                band->stats.n_bytes += skb->len;
 513
 514                /* Drop band triggered, let the caller drop the 'skb'.  */
 515                if (band->type == OVS_METER_BAND_TYPE_DROP) {
 516                        spin_unlock(&meter->lock);
 517                        return true;
 518                }
 519        }
 520
 521        spin_unlock(&meter->lock);
 522        return false;
 523}
 524
 525static struct genl_ops dp_meter_genl_ops[] = {
 526        { .cmd = OVS_METER_CMD_FEATURES,
 527                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 528                .flags = 0,               /* OK for unprivileged users. */
 529                .doit = ovs_meter_cmd_features
 530        },
 531        { .cmd = OVS_METER_CMD_SET,
 532                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 533                .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 534                                           *  privilege.
 535                                           */
 536                .doit = ovs_meter_cmd_set,
 537        },
 538        { .cmd = OVS_METER_CMD_GET,
 539                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 540                .flags = 0,               /* OK for unprivileged users. */
 541                .doit = ovs_meter_cmd_get,
 542        },
 543        { .cmd = OVS_METER_CMD_DEL,
 544                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 545                .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
 546                                           *  privilege.
 547                                           */
 548                .doit = ovs_meter_cmd_del
 549        },
 550};
 551
 552static const struct genl_multicast_group ovs_meter_multicast_group = {
 553        .name = OVS_METER_MCGROUP,
 554};
 555
 556struct genl_family dp_meter_genl_family __ro_after_init = {
 557        .hdrsize = sizeof(struct ovs_header),
 558        .name = OVS_METER_FAMILY,
 559        .version = OVS_METER_VERSION,
 560        .maxattr = OVS_METER_ATTR_MAX,
 561        .policy = meter_policy,
 562        .netnsok = true,
 563        .parallel_ops = true,
 564        .ops = dp_meter_genl_ops,
 565        .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
 566        .mcgrps = &ovs_meter_multicast_group,
 567        .n_mcgrps = 1,
 568        .module = THIS_MODULE,
 569};
 570
 571int ovs_meters_init(struct datapath *dp)
 572{
 573        int i;
 574
 575        dp->meters = kmalloc_array(METER_HASH_BUCKETS,
 576                                   sizeof(struct hlist_head), GFP_KERNEL);
 577
 578        if (!dp->meters)
 579                return -ENOMEM;
 580
 581        for (i = 0; i < METER_HASH_BUCKETS; i++)
 582                INIT_HLIST_HEAD(&dp->meters[i]);
 583
 584        return 0;
 585}
 586
 587void ovs_meters_exit(struct datapath *dp)
 588{
 589        int i;
 590
 591        for (i = 0; i < METER_HASH_BUCKETS; i++) {
 592                struct hlist_head *head = &dp->meters[i];
 593                struct dp_meter *meter;
 594                struct hlist_node *n;
 595
 596                hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
 597                        kfree(meter);
 598        }
 599
 600        kfree(dp->meters);
 601}
 602