linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2007-2014 Nicira, Inc.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <linux/init.h>
   9#include <linux/module.h>
  10#include <linux/if_arp.h>
  11#include <linux/if_vlan.h>
  12#include <linux/in.h>
  13#include <linux/ip.h>
  14#include <linux/jhash.h>
  15#include <linux/delay.h>
  16#include <linux/time.h>
  17#include <linux/etherdevice.h>
  18#include <linux/genetlink.h>
  19#include <linux/kernel.h>
  20#include <linux/kthread.h>
  21#include <linux/mutex.h>
  22#include <linux/percpu.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/ethtool.h>
  27#include <linux/wait.h>
  28#include <asm/div64.h>
  29#include <linux/highmem.h>
  30#include <linux/netfilter_bridge.h>
  31#include <linux/netfilter_ipv4.h>
  32#include <linux/inetdevice.h>
  33#include <linux/list.h>
  34#include <linux/openvswitch.h>
  35#include <linux/rculist.h>
  36#include <linux/dmi.h>
  37#include <net/genetlink.h>
  38#include <net/net_namespace.h>
  39#include <net/netns/generic.h>
  40
  41#include "datapath.h"
  42#include "flow.h"
  43#include "flow_table.h"
  44#include "flow_netlink.h"
  45#include "meter.h"
  46#include "vport-internal_dev.h"
  47#include "vport-netdev.h"
  48
  49unsigned int ovs_net_id __read_mostly;
  50
  51static struct genl_family dp_packet_genl_family;
  52static struct genl_family dp_flow_genl_family;
  53static struct genl_family dp_datapath_genl_family;
  54
  55static const struct nla_policy flow_policy[];
  56
  57static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
  58        .name = OVS_FLOW_MCGROUP,
  59};
  60
  61static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  62        .name = OVS_DATAPATH_MCGROUP,
  63};
  64
  65static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
  66        .name = OVS_VPORT_MCGROUP,
  67};
  68
  69/* Check if need to build a reply message.
  70 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  71static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
  72                            unsigned int group)
  73{
  74        return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  75               genl_has_listeners(family, genl_info_net(info), group);
  76}
  77
  78static void ovs_notify(struct genl_family *family,
  79                       struct sk_buff *skb, struct genl_info *info)
  80{
  81        genl_notify(family, skb, info, 0, GFP_KERNEL);
  82}
  83
  84/**
  85 * DOC: Locking:
  86 *
  87 * All writes e.g. Writes to device state (add/remove datapath, port, set
  88 * operations on vports, etc.), Writes to other state (flow table
  89 * modifications, set miscellaneous datapath parameters, etc.) are protected
  90 * by ovs_lock.
  91 *
  92 * Reads are protected by RCU.
  93 *
  94 * There are a few special cases (mostly stats) that have their own
  95 * synchronization but they nest under all of above and don't interact with
  96 * each other.
  97 *
  98 * The RTNL lock nests inside ovs_mutex.
  99 */
 100
 101static DEFINE_MUTEX(ovs_mutex);
 102
 103void ovs_lock(void)
 104{
 105        mutex_lock(&ovs_mutex);
 106}
 107
 108void ovs_unlock(void)
 109{
 110        mutex_unlock(&ovs_mutex);
 111}
 112
 113#ifdef CONFIG_LOCKDEP
 114int lockdep_ovsl_is_held(void)
 115{
 116        if (debug_locks)
 117                return lockdep_is_held(&ovs_mutex);
 118        else
 119                return 1;
 120}
 121#endif
 122
 123static struct vport *new_vport(const struct vport_parms *);
 124static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 125                             const struct sw_flow_key *,
 126                             const struct dp_upcall_info *,
 127                             uint32_t cutlen);
 128static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 129                                  const struct sw_flow_key *,
 130                                  const struct dp_upcall_info *,
 131                                  uint32_t cutlen);
 132
 133/* Must be called with rcu_read_lock or ovs_mutex. */
 134const char *ovs_dp_name(const struct datapath *dp)
 135{
 136        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 137        return ovs_vport_name(vport);
 138}
 139
 140static int get_dpifindex(const struct datapath *dp)
 141{
 142        struct vport *local;
 143        int ifindex;
 144
 145        rcu_read_lock();
 146
 147        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 148        if (local)
 149                ifindex = local->dev->ifindex;
 150        else
 151                ifindex = 0;
 152
 153        rcu_read_unlock();
 154
 155        return ifindex;
 156}
 157
 158static void destroy_dp_rcu(struct rcu_head *rcu)
 159{
 160        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 161
 162        ovs_flow_tbl_destroy(&dp->table);
 163        free_percpu(dp->stats_percpu);
 164        kfree(dp->ports);
 165        ovs_meters_exit(dp);
 166        kfree(dp);
 167}
 168
 169static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 170                                            u16 port_no)
 171{
 172        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 173}
 174
 175/* Called with ovs_mutex or RCU read lock. */
 176struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 177{
 178        struct vport *vport;
 179        struct hlist_head *head;
 180
 181        head = vport_hash_bucket(dp, port_no);
 182        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 183                if (vport->port_no == port_no)
 184                        return vport;
 185        }
 186        return NULL;
 187}
 188
 189/* Called with ovs_mutex. */
 190static struct vport *new_vport(const struct vport_parms *parms)
 191{
 192        struct vport *vport;
 193
 194        vport = ovs_vport_add(parms);
 195        if (!IS_ERR(vport)) {
 196                struct datapath *dp = parms->dp;
 197                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 198
 199                hlist_add_head_rcu(&vport->dp_hash_node, head);
 200        }
 201        return vport;
 202}
 203
 204void ovs_dp_detach_port(struct vport *p)
 205{
 206        ASSERT_OVSL();
 207
 208        /* First drop references to device. */
 209        hlist_del_rcu(&p->dp_hash_node);
 210
 211        /* Then destroy it. */
 212        ovs_vport_del(p);
 213}
 214
 215/* Must be called with rcu_read_lock. */
 216void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 217{
 218        const struct vport *p = OVS_CB(skb)->input_vport;
 219        struct datapath *dp = p->dp;
 220        struct sw_flow *flow;
 221        struct sw_flow_actions *sf_acts;
 222        struct dp_stats_percpu *stats;
 223        u64 *stats_counter;
 224        u32 n_mask_hit;
 225
 226        stats = this_cpu_ptr(dp->stats_percpu);
 227
 228        /* Look up flow. */
 229        flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
 230        if (unlikely(!flow)) {
 231                struct dp_upcall_info upcall;
 232                int error;
 233
 234                memset(&upcall, 0, sizeof(upcall));
 235                upcall.cmd = OVS_PACKET_CMD_MISS;
 236                upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 237                upcall.mru = OVS_CB(skb)->mru;
 238                error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
 239                if (unlikely(error))
 240                        kfree_skb(skb);
 241                else
 242                        consume_skb(skb);
 243                stats_counter = &stats->n_missed;
 244                goto out;
 245        }
 246
 247        ovs_flow_stats_update(flow, key->tp.flags, skb);
 248        sf_acts = rcu_dereference(flow->sf_acts);
 249        ovs_execute_actions(dp, skb, sf_acts, key);
 250
 251        stats_counter = &stats->n_hit;
 252
 253out:
 254        /* Update datapath statistics. */
 255        u64_stats_update_begin(&stats->syncp);
 256        (*stats_counter)++;
 257        stats->n_mask_hit += n_mask_hit;
 258        u64_stats_update_end(&stats->syncp);
 259}
 260
 261int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 262                  const struct sw_flow_key *key,
 263                  const struct dp_upcall_info *upcall_info,
 264                  uint32_t cutlen)
 265{
 266        struct dp_stats_percpu *stats;
 267        int err;
 268
 269        if (upcall_info->portid == 0) {
 270                err = -ENOTCONN;
 271                goto err;
 272        }
 273
 274        if (!skb_is_gso(skb))
 275                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 276        else
 277                err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
 278        if (err)
 279                goto err;
 280
 281        return 0;
 282
 283err:
 284        stats = this_cpu_ptr(dp->stats_percpu);
 285
 286        u64_stats_update_begin(&stats->syncp);
 287        stats->n_lost++;
 288        u64_stats_update_end(&stats->syncp);
 289
 290        return err;
 291}
 292
 293static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 294                             const struct sw_flow_key *key,
 295                             const struct dp_upcall_info *upcall_info,
 296                                 uint32_t cutlen)
 297{
 298        unsigned int gso_type = skb_shinfo(skb)->gso_type;
 299        struct sw_flow_key later_key;
 300        struct sk_buff *segs, *nskb;
 301        int err;
 302
 303        BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
 304        segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 305        if (IS_ERR(segs))
 306                return PTR_ERR(segs);
 307        if (segs == NULL)
 308                return -EINVAL;
 309
 310        if (gso_type & SKB_GSO_UDP) {
 311                /* The initial flow key extracted by ovs_flow_key_extract()
 312                 * in this case is for a first fragment, so we need to
 313                 * properly mark later fragments.
 314                 */
 315                later_key = *key;
 316                later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 317        }
 318
 319        /* Queue all of the segments. */
 320        skb = segs;
 321        do {
 322                if (gso_type & SKB_GSO_UDP && skb != segs)
 323                        key = &later_key;
 324
 325                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 326                if (err)
 327                        break;
 328
 329        } while ((skb = skb->next));
 330
 331        /* Free all of the segments. */
 332        skb = segs;
 333        do {
 334                nskb = skb->next;
 335                if (err)
 336                        kfree_skb(skb);
 337                else
 338                        consume_skb(skb);
 339        } while ((skb = nskb));
 340        return err;
 341}
 342
 343static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 344                              unsigned int hdrlen, int actions_attrlen)
 345{
 346        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 347                + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
 348                + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
 349                + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
 350
 351        /* OVS_PACKET_ATTR_USERDATA */
 352        if (upcall_info->userdata)
 353                size += NLA_ALIGN(upcall_info->userdata->nla_len);
 354
 355        /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
 356        if (upcall_info->egress_tun_info)
 357                size += nla_total_size(ovs_tun_key_attr_size());
 358
 359        /* OVS_PACKET_ATTR_ACTIONS */
 360        if (upcall_info->actions_len)
 361                size += nla_total_size(actions_attrlen);
 362
 363        /* OVS_PACKET_ATTR_MRU */
 364        if (upcall_info->mru)
 365                size += nla_total_size(sizeof(upcall_info->mru));
 366
 367        return size;
 368}
 369
 370static void pad_packet(struct datapath *dp, struct sk_buff *skb)
 371{
 372        if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
 373                size_t plen = NLA_ALIGN(skb->len) - skb->len;
 374
 375                if (plen > 0)
 376                        skb_put_zero(skb, plen);
 377        }
 378}
 379
 380static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 381                                  const struct sw_flow_key *key,
 382                                  const struct dp_upcall_info *upcall_info,
 383                                  uint32_t cutlen)
 384{
 385        struct ovs_header *upcall;
 386        struct sk_buff *nskb = NULL;
 387        struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 388        struct nlattr *nla;
 389        size_t len;
 390        unsigned int hlen;
 391        int err, dp_ifindex;
 392
 393        dp_ifindex = get_dpifindex(dp);
 394        if (!dp_ifindex)
 395                return -ENODEV;
 396
 397        if (skb_vlan_tag_present(skb)) {
 398                nskb = skb_clone(skb, GFP_ATOMIC);
 399                if (!nskb)
 400                        return -ENOMEM;
 401
 402                nskb = __vlan_hwaccel_push_inside(nskb);
 403                if (!nskb)
 404                        return -ENOMEM;
 405
 406                skb = nskb;
 407        }
 408
 409        if (nla_attr_size(skb->len) > USHRT_MAX) {
 410                err = -EFBIG;
 411                goto out;
 412        }
 413
 414        /* Complete checksum if needed */
 415        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 416            (err = skb_csum_hwoffload_help(skb, 0)))
 417                goto out;
 418
 419        /* Older versions of OVS user space enforce alignment of the last
 420         * Netlink attribute to NLA_ALIGNTO which would require extensive
 421         * padding logic. Only perform zerocopy if padding is not required.
 422         */
 423        if (dp->user_features & OVS_DP_F_UNALIGNED)
 424                hlen = skb_zerocopy_headlen(skb);
 425        else
 426                hlen = skb->len;
 427
 428        len = upcall_msg_size(upcall_info, hlen - cutlen,
 429                              OVS_CB(skb)->acts_origlen);
 430        user_skb = genlmsg_new(len, GFP_ATOMIC);
 431        if (!user_skb) {
 432                err = -ENOMEM;
 433                goto out;
 434        }
 435
 436        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 437                             0, upcall_info->cmd);
 438        if (!upcall) {
 439                err = -EINVAL;
 440                goto out;
 441        }
 442        upcall->dp_ifindex = dp_ifindex;
 443
 444        err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 445        if (err)
 446                goto out;
 447
 448        if (upcall_info->userdata)
 449                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 450                          nla_len(upcall_info->userdata),
 451                          nla_data(upcall_info->userdata));
 452
 453        if (upcall_info->egress_tun_info) {
 454                nla = nla_nest_start_noflag(user_skb,
 455                                            OVS_PACKET_ATTR_EGRESS_TUN_KEY);
 456                if (!nla) {
 457                        err = -EMSGSIZE;
 458                        goto out;
 459                }
 460                err = ovs_nla_put_tunnel_info(user_skb,
 461                                              upcall_info->egress_tun_info);
 462                if (err)
 463                        goto out;
 464
 465                nla_nest_end(user_skb, nla);
 466        }
 467
 468        if (upcall_info->actions_len) {
 469                nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
 470                if (!nla) {
 471                        err = -EMSGSIZE;
 472                        goto out;
 473                }
 474                err = ovs_nla_put_actions(upcall_info->actions,
 475                                          upcall_info->actions_len,
 476                                          user_skb);
 477                if (!err)
 478                        nla_nest_end(user_skb, nla);
 479                else
 480                        nla_nest_cancel(user_skb, nla);
 481        }
 482
 483        /* Add OVS_PACKET_ATTR_MRU */
 484        if (upcall_info->mru) {
 485                if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
 486                                upcall_info->mru)) {
 487                        err = -ENOBUFS;
 488                        goto out;
 489                }
 490                pad_packet(dp, user_skb);
 491        }
 492
 493        /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
 494        if (cutlen > 0) {
 495                if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
 496                                skb->len)) {
 497                        err = -ENOBUFS;
 498                        goto out;
 499                }
 500                pad_packet(dp, user_skb);
 501        }
 502
 503        /* Only reserve room for attribute header, packet data is added
 504         * in skb_zerocopy() */
 505        if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 506                err = -ENOBUFS;
 507                goto out;
 508        }
 509        nla->nla_len = nla_attr_size(skb->len - cutlen);
 510
 511        err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
 512        if (err)
 513                goto out;
 514
 515        /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
 516        pad_packet(dp, user_skb);
 517
 518        ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 519
 520        err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
 521        user_skb = NULL;
 522out:
 523        if (err)
 524                skb_tx_error(skb);
 525        kfree_skb(user_skb);
 526        kfree_skb(nskb);
 527        return err;
 528}
 529
 530static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 531{
 532        struct ovs_header *ovs_header = info->userhdr;
 533        struct net *net = sock_net(skb->sk);
 534        struct nlattr **a = info->attrs;
 535        struct sw_flow_actions *acts;
 536        struct sk_buff *packet;
 537        struct sw_flow *flow;
 538        struct sw_flow_actions *sf_acts;
 539        struct datapath *dp;
 540        struct vport *input_vport;
 541        u16 mru = 0;
 542        int len;
 543        int err;
 544        bool log = !a[OVS_PACKET_ATTR_PROBE];
 545
 546        err = -EINVAL;
 547        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 548            !a[OVS_PACKET_ATTR_ACTIONS])
 549                goto err;
 550
 551        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 552        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 553        err = -ENOMEM;
 554        if (!packet)
 555                goto err;
 556        skb_reserve(packet, NET_IP_ALIGN);
 557
 558        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 559
 560        /* Set packet's mru */
 561        if (a[OVS_PACKET_ATTR_MRU]) {
 562                mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
 563                packet->ignore_df = 1;
 564        }
 565        OVS_CB(packet)->mru = mru;
 566
 567        /* Build an sw_flow for sending this packet. */
 568        flow = ovs_flow_alloc();
 569        err = PTR_ERR(flow);
 570        if (IS_ERR(flow))
 571                goto err_kfree_skb;
 572
 573        err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
 574                                             packet, &flow->key, log);
 575        if (err)
 576                goto err_flow_free;
 577
 578        err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
 579                                   &flow->key, &acts, log);
 580        if (err)
 581                goto err_flow_free;
 582
 583        rcu_assign_pointer(flow->sf_acts, acts);
 584        packet->priority = flow->key.phy.priority;
 585        packet->mark = flow->key.phy.skb_mark;
 586
 587        rcu_read_lock();
 588        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
 589        err = -ENODEV;
 590        if (!dp)
 591                goto err_unlock;
 592
 593        input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
 594        if (!input_vport)
 595                input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
 596
 597        if (!input_vport)
 598                goto err_unlock;
 599
 600        packet->dev = input_vport->dev;
 601        OVS_CB(packet)->input_vport = input_vport;
 602        sf_acts = rcu_dereference(flow->sf_acts);
 603
 604        local_bh_disable();
 605        err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
 606        local_bh_enable();
 607        rcu_read_unlock();
 608
 609        ovs_flow_free(flow, false);
 610        return err;
 611
 612err_unlock:
 613        rcu_read_unlock();
 614err_flow_free:
 615        ovs_flow_free(flow, false);
 616err_kfree_skb:
 617        kfree_skb(packet);
 618err:
 619        return err;
 620}
 621
 622static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 623        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 624        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 625        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 626        [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
 627        [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
 628};
 629
 630static const struct genl_ops dp_packet_genl_ops[] = {
 631        { .cmd = OVS_PACKET_CMD_EXECUTE,
 632          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 633          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 634          .doit = ovs_packet_cmd_execute
 635        }
 636};
 637
 638static struct genl_family dp_packet_genl_family __ro_after_init = {
 639        .hdrsize = sizeof(struct ovs_header),
 640        .name = OVS_PACKET_FAMILY,
 641        .version = OVS_PACKET_VERSION,
 642        .maxattr = OVS_PACKET_ATTR_MAX,
 643        .policy = packet_policy,
 644        .netnsok = true,
 645        .parallel_ops = true,
 646        .ops = dp_packet_genl_ops,
 647        .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
 648        .module = THIS_MODULE,
 649};
 650
 651static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 652                         struct ovs_dp_megaflow_stats *mega_stats)
 653{
 654        int i;
 655
 656        memset(mega_stats, 0, sizeof(*mega_stats));
 657
 658        stats->n_flows = ovs_flow_tbl_count(&dp->table);
 659        mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 660
 661        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 662
 663        for_each_possible_cpu(i) {
 664                const struct dp_stats_percpu *percpu_stats;
 665                struct dp_stats_percpu local_stats;
 666                unsigned int start;
 667
 668                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 669
 670                do {
 671                        start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
 672                        local_stats = *percpu_stats;
 673                } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
 674
 675                stats->n_hit += local_stats.n_hit;
 676                stats->n_missed += local_stats.n_missed;
 677                stats->n_lost += local_stats.n_lost;
 678                mega_stats->n_mask_hit += local_stats.n_mask_hit;
 679        }
 680}
 681
 682static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
 683{
 684        return ovs_identifier_is_ufid(sfid) &&
 685               !(ufid_flags & OVS_UFID_F_OMIT_KEY);
 686}
 687
 688static bool should_fill_mask(uint32_t ufid_flags)
 689{
 690        return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
 691}
 692
 693static bool should_fill_actions(uint32_t ufid_flags)
 694{
 695        return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
 696}
 697
 698static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
 699                                    const struct sw_flow_id *sfid,
 700                                    uint32_t ufid_flags)
 701{
 702        size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 703
 704        /* OVS_FLOW_ATTR_UFID */
 705        if (sfid && ovs_identifier_is_ufid(sfid))
 706                len += nla_total_size(sfid->ufid_len);
 707
 708        /* OVS_FLOW_ATTR_KEY */
 709        if (!sfid || should_fill_key(sfid, ufid_flags))
 710                len += nla_total_size(ovs_key_attr_size());
 711
 712        /* OVS_FLOW_ATTR_MASK */
 713        if (should_fill_mask(ufid_flags))
 714                len += nla_total_size(ovs_key_attr_size());
 715
 716        /* OVS_FLOW_ATTR_ACTIONS */
 717        if (should_fill_actions(ufid_flags))
 718                len += nla_total_size(acts->orig_len);
 719
 720        return len
 721                + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 722                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 723                + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
 724}
 725
 726/* Called with ovs_mutex or RCU read lock. */
 727static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 728                                   struct sk_buff *skb)
 729{
 730        struct ovs_flow_stats stats;
 731        __be16 tcp_flags;
 732        unsigned long used;
 733
 734        ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 735
 736        if (used &&
 737            nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
 738                              OVS_FLOW_ATTR_PAD))
 739                return -EMSGSIZE;
 740
 741        if (stats.n_packets &&
 742            nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
 743                          sizeof(struct ovs_flow_stats), &stats,
 744                          OVS_FLOW_ATTR_PAD))
 745                return -EMSGSIZE;
 746
 747        if ((u8)ntohs(tcp_flags) &&
 748             nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
 749                return -EMSGSIZE;
 750
 751        return 0;
 752}
 753
 754/* Called with ovs_mutex or RCU read lock. */
 755static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 756                                     struct sk_buff *skb, int skb_orig_len)
 757{
 758        struct nlattr *start;
 759        int err;
 760
 761        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 762         * this is the first flow to be dumped into 'skb'.  This is unusual for
 763         * Netlink but individual action lists can be longer than
 764         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 765         * The userspace caller can always fetch the actions separately if it
 766         * really wants them.  (Most userspace callers in fact don't care.)
 767         *
 768         * This can only fail for dump operations because the skb is always
 769         * properly sized for single flows.
 770         */
 771        start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
 772        if (start) {
 773                const struct sw_flow_actions *sf_acts;
 774
 775                sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 776                err = ovs_nla_put_actions(sf_acts->actions,
 777                                          sf_acts->actions_len, skb);
 778
 779                if (!err)
 780                        nla_nest_end(skb, start);
 781                else {
 782                        if (skb_orig_len)
 783                                return err;
 784
 785                        nla_nest_cancel(skb, start);
 786                }
 787        } else if (skb_orig_len) {
 788                return -EMSGSIZE;
 789        }
 790
 791        return 0;
 792}
 793
 794/* Called with ovs_mutex or RCU read lock. */
 795static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 796                                  struct sk_buff *skb, u32 portid,
 797                                  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 798{
 799        const int skb_orig_len = skb->len;
 800        struct ovs_header *ovs_header;
 801        int err;
 802
 803        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
 804                                 flags, cmd);
 805        if (!ovs_header)
 806                return -EMSGSIZE;
 807
 808        ovs_header->dp_ifindex = dp_ifindex;
 809
 810        err = ovs_nla_put_identifier(flow, skb);
 811        if (err)
 812                goto error;
 813
 814        if (should_fill_key(&flow->id, ufid_flags)) {
 815                err = ovs_nla_put_masked_key(flow, skb);
 816                if (err)
 817                        goto error;
 818        }
 819
 820        if (should_fill_mask(ufid_flags)) {
 821                err = ovs_nla_put_mask(flow, skb);
 822                if (err)
 823                        goto error;
 824        }
 825
 826        err = ovs_flow_cmd_fill_stats(flow, skb);
 827        if (err)
 828                goto error;
 829
 830        if (should_fill_actions(ufid_flags)) {
 831                err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
 832                if (err)
 833                        goto error;
 834        }
 835
 836        genlmsg_end(skb, ovs_header);
 837        return 0;
 838
 839error:
 840        genlmsg_cancel(skb, ovs_header);
 841        return err;
 842}
 843
 844/* May not be called with RCU read lock. */
 845static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
 846                                               const struct sw_flow_id *sfid,
 847                                               struct genl_info *info,
 848                                               bool always,
 849                                               uint32_t ufid_flags)
 850{
 851        struct sk_buff *skb;
 852        size_t len;
 853
 854        if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 855                return NULL;
 856
 857        len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
 858        skb = genlmsg_new(len, GFP_KERNEL);
 859        if (!skb)
 860                return ERR_PTR(-ENOMEM);
 861
 862        return skb;
 863}
 864
 865/* Called with ovs_mutex. */
 866static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 867                                               int dp_ifindex,
 868                                               struct genl_info *info, u8 cmd,
 869                                               bool always, u32 ufid_flags)
 870{
 871        struct sk_buff *skb;
 872        int retval;
 873
 874        skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
 875                                      &flow->id, info, always, ufid_flags);
 876        if (IS_ERR_OR_NULL(skb))
 877                return skb;
 878
 879        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 880                                        info->snd_portid, info->snd_seq, 0,
 881                                        cmd, ufid_flags);
 882        BUG_ON(retval < 0);
 883        return skb;
 884}
 885
 886static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 887{
 888        struct net *net = sock_net(skb->sk);
 889        struct nlattr **a = info->attrs;
 890        struct ovs_header *ovs_header = info->userhdr;
 891        struct sw_flow *flow = NULL, *new_flow;
 892        struct sw_flow_mask mask;
 893        struct sk_buff *reply;
 894        struct datapath *dp;
 895        struct sw_flow_actions *acts;
 896        struct sw_flow_match match;
 897        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 898        int error;
 899        bool log = !a[OVS_FLOW_ATTR_PROBE];
 900
 901        /* Must have key and actions. */
 902        error = -EINVAL;
 903        if (!a[OVS_FLOW_ATTR_KEY]) {
 904                OVS_NLERR(log, "Flow key attr not present in new flow.");
 905                goto error;
 906        }
 907        if (!a[OVS_FLOW_ATTR_ACTIONS]) {
 908                OVS_NLERR(log, "Flow actions attr not present in new flow.");
 909                goto error;
 910        }
 911
 912        /* Most of the time we need to allocate a new flow, do it before
 913         * locking.
 914         */
 915        new_flow = ovs_flow_alloc();
 916        if (IS_ERR(new_flow)) {
 917                error = PTR_ERR(new_flow);
 918                goto error;
 919        }
 920
 921        /* Extract key. */
 922        ovs_match_init(&match, &new_flow->key, false, &mask);
 923        error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 924                                  a[OVS_FLOW_ATTR_MASK], log);
 925        if (error)
 926                goto err_kfree_flow;
 927
 928        /* Extract flow identifier. */
 929        error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
 930                                       &new_flow->key, log);
 931        if (error)
 932                goto err_kfree_flow;
 933
 934        /* unmasked key is needed to match when ufid is not used. */
 935        if (ovs_identifier_is_key(&new_flow->id))
 936                match.key = new_flow->id.unmasked_key;
 937
 938        ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
 939
 940        /* Validate actions. */
 941        error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 942                                     &new_flow->key, &acts, log);
 943        if (error) {
 944                OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
 945                goto err_kfree_flow;
 946        }
 947
 948        reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
 949                                        ufid_flags);
 950        if (IS_ERR(reply)) {
 951                error = PTR_ERR(reply);
 952                goto err_kfree_acts;
 953        }
 954
 955        ovs_lock();
 956        dp = get_dp(net, ovs_header->dp_ifindex);
 957        if (unlikely(!dp)) {
 958                error = -ENODEV;
 959                goto err_unlock_ovs;
 960        }
 961
 962        /* Check if this is a duplicate flow */
 963        if (ovs_identifier_is_ufid(&new_flow->id))
 964                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
 965        if (!flow)
 966                flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
 967        if (likely(!flow)) {
 968                rcu_assign_pointer(new_flow->sf_acts, acts);
 969
 970                /* Put flow in bucket. */
 971                error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
 972                if (unlikely(error)) {
 973                        acts = NULL;
 974                        goto err_unlock_ovs;
 975                }
 976
 977                if (unlikely(reply)) {
 978                        error = ovs_flow_cmd_fill_info(new_flow,
 979                                                       ovs_header->dp_ifindex,
 980                                                       reply, info->snd_portid,
 981                                                       info->snd_seq, 0,
 982                                                       OVS_FLOW_CMD_NEW,
 983                                                       ufid_flags);
 984                        BUG_ON(error < 0);
 985                }
 986                ovs_unlock();
 987        } else {
 988                struct sw_flow_actions *old_acts;
 989
 990                /* Bail out if we're not allowed to modify an existing flow.
 991                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
 992                 * because Generic Netlink treats the latter as a dump
 993                 * request.  We also accept NLM_F_EXCL in case that bug ever
 994                 * gets fixed.
 995                 */
 996                if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
 997                                                         | NLM_F_EXCL))) {
 998                        error = -EEXIST;
 999                        goto err_unlock_ovs;
1000                }
1001                /* The flow identifier has to be the same for flow updates.
1002                 * Look for any overlapping flow.
1003                 */
1004                if (unlikely(!ovs_flow_cmp(flow, &match))) {
1005                        if (ovs_identifier_is_key(&flow->id))
1006                                flow = ovs_flow_tbl_lookup_exact(&dp->table,
1007                                                                 &match);
1008                        else /* UFID matches but key is different */
1009                                flow = NULL;
1010                        if (!flow) {
1011                                error = -ENOENT;
1012                                goto err_unlock_ovs;
1013                        }
1014                }
1015                /* Update actions. */
1016                old_acts = ovsl_dereference(flow->sf_acts);
1017                rcu_assign_pointer(flow->sf_acts, acts);
1018
1019                if (unlikely(reply)) {
1020                        error = ovs_flow_cmd_fill_info(flow,
1021                                                       ovs_header->dp_ifindex,
1022                                                       reply, info->snd_portid,
1023                                                       info->snd_seq, 0,
1024                                                       OVS_FLOW_CMD_NEW,
1025                                                       ufid_flags);
1026                        BUG_ON(error < 0);
1027                }
1028                ovs_unlock();
1029
1030                ovs_nla_free_flow_actions_rcu(old_acts);
1031                ovs_flow_free(new_flow, false);
1032        }
1033
1034        if (reply)
1035                ovs_notify(&dp_flow_genl_family, reply, info);
1036        return 0;
1037
1038err_unlock_ovs:
1039        ovs_unlock();
1040        kfree_skb(reply);
1041err_kfree_acts:
1042        ovs_nla_free_flow_actions(acts);
1043err_kfree_flow:
1044        ovs_flow_free(new_flow, false);
1045error:
1046        return error;
1047}
1048
1049/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1050static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net,
1051                                                const struct nlattr *a,
1052                                                const struct sw_flow_key *key,
1053                                                const struct sw_flow_mask *mask,
1054                                                bool log)
1055{
1056        struct sw_flow_actions *acts;
1057        struct sw_flow_key masked_key;
1058        int error;
1059
1060        ovs_flow_mask_key(&masked_key, key, true, mask);
1061        error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1062        if (error) {
1063                OVS_NLERR(log,
1064                          "Actions may not be safe on all matching packets");
1065                return ERR_PTR(error);
1066        }
1067
1068        return acts;
1069}
1070
1071/* Factor out match-init and action-copy to avoid
1072 * "Wframe-larger-than=1024" warning. Because mask is only
1073 * used to get actions, we new a function to save some
1074 * stack space.
1075 *
1076 * If there are not key and action attrs, we return 0
1077 * directly. In the case, the caller will also not use the
1078 * match as before. If there is action attr, we try to get
1079 * actions and save them to *acts. Before returning from
1080 * the function, we reset the match->mask pointer. Because
1081 * we should not to return match object with dangling reference
1082 * to mask.
1083 * */
1084static noinline_for_stack int
1085ovs_nla_init_match_and_action(struct net *net,
1086                              struct sw_flow_match *match,
1087                              struct sw_flow_key *key,
1088                              struct nlattr **a,
1089                              struct sw_flow_actions **acts,
1090                              bool log)
1091{
1092        struct sw_flow_mask mask;
1093        int error = 0;
1094
1095        if (a[OVS_FLOW_ATTR_KEY]) {
1096                ovs_match_init(match, key, true, &mask);
1097                error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1098                                          a[OVS_FLOW_ATTR_MASK], log);
1099                if (error)
1100                        goto error;
1101        }
1102
1103        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1104                if (!a[OVS_FLOW_ATTR_KEY]) {
1105                        OVS_NLERR(log,
1106                                  "Flow key attribute not present in set flow.");
1107                        error = -EINVAL;
1108                        goto error;
1109                }
1110
1111                *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1112                                         &mask, log);
1113                if (IS_ERR(*acts)) {
1114                        error = PTR_ERR(*acts);
1115                        goto error;
1116                }
1117        }
1118
1119        /* On success, error is 0. */
1120error:
1121        match->mask = NULL;
1122        return error;
1123}
1124
1125static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1126{
1127        struct net *net = sock_net(skb->sk);
1128        struct nlattr **a = info->attrs;
1129        struct ovs_header *ovs_header = info->userhdr;
1130        struct sw_flow_key key;
1131        struct sw_flow *flow;
1132        struct sk_buff *reply = NULL;
1133        struct datapath *dp;
1134        struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1135        struct sw_flow_match match;
1136        struct sw_flow_id sfid;
1137        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1138        int error = 0;
1139        bool log = !a[OVS_FLOW_ATTR_PROBE];
1140        bool ufid_present;
1141
1142        ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1143        if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1144                OVS_NLERR(log,
1145                          "Flow set message rejected, Key attribute missing.");
1146                return -EINVAL;
1147        }
1148
1149        error = ovs_nla_init_match_and_action(net, &match, &key, a,
1150                                              &acts, log);
1151        if (error)
1152                goto error;
1153
1154        if (acts) {
1155                /* Can allocate before locking if have acts. */
1156                reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1157                                                ufid_flags);
1158                if (IS_ERR(reply)) {
1159                        error = PTR_ERR(reply);
1160                        goto err_kfree_acts;
1161                }
1162        }
1163
1164        ovs_lock();
1165        dp = get_dp(net, ovs_header->dp_ifindex);
1166        if (unlikely(!dp)) {
1167                error = -ENODEV;
1168                goto err_unlock_ovs;
1169        }
1170        /* Check that the flow exists. */
1171        if (ufid_present)
1172                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1173        else
1174                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1175        if (unlikely(!flow)) {
1176                error = -ENOENT;
1177                goto err_unlock_ovs;
1178        }
1179
1180        /* Update actions, if present. */
1181        if (likely(acts)) {
1182                old_acts = ovsl_dereference(flow->sf_acts);
1183                rcu_assign_pointer(flow->sf_acts, acts);
1184
1185                if (unlikely(reply)) {
1186                        error = ovs_flow_cmd_fill_info(flow,
1187                                                       ovs_header->dp_ifindex,
1188                                                       reply, info->snd_portid,
1189                                                       info->snd_seq, 0,
1190                                                       OVS_FLOW_CMD_SET,
1191                                                       ufid_flags);
1192                        BUG_ON(error < 0);
1193                }
1194        } else {
1195                /* Could not alloc without acts before locking. */
1196                reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1197                                                info, OVS_FLOW_CMD_SET, false,
1198                                                ufid_flags);
1199
1200                if (IS_ERR(reply)) {
1201                        error = PTR_ERR(reply);
1202                        goto err_unlock_ovs;
1203                }
1204        }
1205
1206        /* Clear stats. */
1207        if (a[OVS_FLOW_ATTR_CLEAR])
1208                ovs_flow_stats_clear(flow);
1209        ovs_unlock();
1210
1211        if (reply)
1212                ovs_notify(&dp_flow_genl_family, reply, info);
1213        if (old_acts)
1214                ovs_nla_free_flow_actions_rcu(old_acts);
1215
1216        return 0;
1217
1218err_unlock_ovs:
1219        ovs_unlock();
1220        kfree_skb(reply);
1221err_kfree_acts:
1222        ovs_nla_free_flow_actions(acts);
1223error:
1224        return error;
1225}
1226
1227static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1228{
1229        struct nlattr **a = info->attrs;
1230        struct ovs_header *ovs_header = info->userhdr;
1231        struct net *net = sock_net(skb->sk);
1232        struct sw_flow_key key;
1233        struct sk_buff *reply;
1234        struct sw_flow *flow;
1235        struct datapath *dp;
1236        struct sw_flow_match match;
1237        struct sw_flow_id ufid;
1238        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1239        int err = 0;
1240        bool log = !a[OVS_FLOW_ATTR_PROBE];
1241        bool ufid_present;
1242
1243        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1244        if (a[OVS_FLOW_ATTR_KEY]) {
1245                ovs_match_init(&match, &key, true, NULL);
1246                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1247                                        log);
1248        } else if (!ufid_present) {
1249                OVS_NLERR(log,
1250                          "Flow get message rejected, Key attribute missing.");
1251                err = -EINVAL;
1252        }
1253        if (err)
1254                return err;
1255
1256        ovs_lock();
1257        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1258        if (!dp) {
1259                err = -ENODEV;
1260                goto unlock;
1261        }
1262
1263        if (ufid_present)
1264                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1265        else
1266                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1267        if (!flow) {
1268                err = -ENOENT;
1269                goto unlock;
1270        }
1271
1272        reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1273                                        OVS_FLOW_CMD_GET, true, ufid_flags);
1274        if (IS_ERR(reply)) {
1275                err = PTR_ERR(reply);
1276                goto unlock;
1277        }
1278
1279        ovs_unlock();
1280        return genlmsg_reply(reply, info);
1281unlock:
1282        ovs_unlock();
1283        return err;
1284}
1285
1286static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1287{
1288        struct nlattr **a = info->attrs;
1289        struct ovs_header *ovs_header = info->userhdr;
1290        struct net *net = sock_net(skb->sk);
1291        struct sw_flow_key key;
1292        struct sk_buff *reply;
1293        struct sw_flow *flow = NULL;
1294        struct datapath *dp;
1295        struct sw_flow_match match;
1296        struct sw_flow_id ufid;
1297        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1298        int err;
1299        bool log = !a[OVS_FLOW_ATTR_PROBE];
1300        bool ufid_present;
1301
1302        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1303        if (a[OVS_FLOW_ATTR_KEY]) {
1304                ovs_match_init(&match, &key, true, NULL);
1305                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1306                                        NULL, log);
1307                if (unlikely(err))
1308                        return err;
1309        }
1310
1311        ovs_lock();
1312        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1313        if (unlikely(!dp)) {
1314                err = -ENODEV;
1315                goto unlock;
1316        }
1317
1318        if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1319                err = ovs_flow_tbl_flush(&dp->table);
1320                goto unlock;
1321        }
1322
1323        if (ufid_present)
1324                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1325        else
1326                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1327        if (unlikely(!flow)) {
1328                err = -ENOENT;
1329                goto unlock;
1330        }
1331
1332        ovs_flow_tbl_remove(&dp->table, flow);
1333        ovs_unlock();
1334
1335        reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1336                                        &flow->id, info, false, ufid_flags);
1337        if (likely(reply)) {
1338                if (!IS_ERR(reply)) {
1339                        rcu_read_lock();        /*To keep RCU checker happy. */
1340                        err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1341                                                     reply, info->snd_portid,
1342                                                     info->snd_seq, 0,
1343                                                     OVS_FLOW_CMD_DEL,
1344                                                     ufid_flags);
1345                        rcu_read_unlock();
1346                        BUG_ON(err < 0);
1347
1348                        ovs_notify(&dp_flow_genl_family, reply, info);
1349                } else {
1350                        netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1351                }
1352        }
1353
1354        ovs_flow_free(flow, true);
1355        return 0;
1356unlock:
1357        ovs_unlock();
1358        return err;
1359}
1360
1361static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1362{
1363        struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1364        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1365        struct table_instance *ti;
1366        struct datapath *dp;
1367        u32 ufid_flags;
1368        int err;
1369
1370        err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1371                                       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1372        if (err)
1373                return err;
1374        ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1375
1376        rcu_read_lock();
1377        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1378        if (!dp) {
1379                rcu_read_unlock();
1380                return -ENODEV;
1381        }
1382
1383        ti = rcu_dereference(dp->table.ti);
1384        for (;;) {
1385                struct sw_flow *flow;
1386                u32 bucket, obj;
1387
1388                bucket = cb->args[0];
1389                obj = cb->args[1];
1390                flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1391                if (!flow)
1392                        break;
1393
1394                if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1395                                           NETLINK_CB(cb->skb).portid,
1396                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1397                                           OVS_FLOW_CMD_GET, ufid_flags) < 0)
1398                        break;
1399
1400                cb->args[0] = bucket;
1401                cb->args[1] = obj;
1402        }
1403        rcu_read_unlock();
1404        return skb->len;
1405}
1406
1407static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1408        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1409        [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1410        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1411        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1412        [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1413        [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1414        [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1415};
1416
1417static const struct genl_ops dp_flow_genl_ops[] = {
1418        { .cmd = OVS_FLOW_CMD_NEW,
1419          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1420          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1421          .doit = ovs_flow_cmd_new
1422        },
1423        { .cmd = OVS_FLOW_CMD_DEL,
1424          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1425          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1426          .doit = ovs_flow_cmd_del
1427        },
1428        { .cmd = OVS_FLOW_CMD_GET,
1429          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1430          .flags = 0,               /* OK for unprivileged users. */
1431          .doit = ovs_flow_cmd_get,
1432          .dumpit = ovs_flow_cmd_dump
1433        },
1434        { .cmd = OVS_FLOW_CMD_SET,
1435          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1436          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1437          .doit = ovs_flow_cmd_set,
1438        },
1439};
1440
1441static struct genl_family dp_flow_genl_family __ro_after_init = {
1442        .hdrsize = sizeof(struct ovs_header),
1443        .name = OVS_FLOW_FAMILY,
1444        .version = OVS_FLOW_VERSION,
1445        .maxattr = OVS_FLOW_ATTR_MAX,
1446        .policy = flow_policy,
1447        .netnsok = true,
1448        .parallel_ops = true,
1449        .ops = dp_flow_genl_ops,
1450        .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1451        .mcgrps = &ovs_dp_flow_multicast_group,
1452        .n_mcgrps = 1,
1453        .module = THIS_MODULE,
1454};
1455
1456static size_t ovs_dp_cmd_msg_size(void)
1457{
1458        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1459
1460        msgsize += nla_total_size(IFNAMSIZ);
1461        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1462        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1463        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1464
1465        return msgsize;
1466}
1467
1468/* Called with ovs_mutex. */
1469static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1470                                u32 portid, u32 seq, u32 flags, u8 cmd)
1471{
1472        struct ovs_header *ovs_header;
1473        struct ovs_dp_stats dp_stats;
1474        struct ovs_dp_megaflow_stats dp_megaflow_stats;
1475        int err;
1476
1477        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1478                                   flags, cmd);
1479        if (!ovs_header)
1480                goto error;
1481
1482        ovs_header->dp_ifindex = get_dpifindex(dp);
1483
1484        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1485        if (err)
1486                goto nla_put_failure;
1487
1488        get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1489        if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1490                          &dp_stats, OVS_DP_ATTR_PAD))
1491                goto nla_put_failure;
1492
1493        if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1494                          sizeof(struct ovs_dp_megaflow_stats),
1495                          &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1496                goto nla_put_failure;
1497
1498        if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1499                goto nla_put_failure;
1500
1501        genlmsg_end(skb, ovs_header);
1502        return 0;
1503
1504nla_put_failure:
1505        genlmsg_cancel(skb, ovs_header);
1506error:
1507        return -EMSGSIZE;
1508}
1509
1510static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1511{
1512        return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1513}
1514
1515/* Called with rcu_read_lock or ovs_mutex. */
1516static struct datapath *lookup_datapath(struct net *net,
1517                                        const struct ovs_header *ovs_header,
1518                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1519{
1520        struct datapath *dp;
1521
1522        if (!a[OVS_DP_ATTR_NAME])
1523                dp = get_dp(net, ovs_header->dp_ifindex);
1524        else {
1525                struct vport *vport;
1526
1527                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1528                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1529        }
1530        return dp ? dp : ERR_PTR(-ENODEV);
1531}
1532
1533static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1534{
1535        struct datapath *dp;
1536
1537        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1538        if (IS_ERR(dp))
1539                return;
1540
1541        WARN(dp->user_features, "Dropping previously announced user features\n");
1542        dp->user_features = 0;
1543}
1544
1545static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1546{
1547        if (a[OVS_DP_ATTR_USER_FEATURES])
1548                dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1549}
1550
1551static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1552{
1553        struct nlattr **a = info->attrs;
1554        struct vport_parms parms;
1555        struct sk_buff *reply;
1556        struct datapath *dp;
1557        struct vport *vport;
1558        struct ovs_net *ovs_net;
1559        int err, i;
1560
1561        err = -EINVAL;
1562        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1563                goto err;
1564
1565        reply = ovs_dp_cmd_alloc_info();
1566        if (!reply)
1567                return -ENOMEM;
1568
1569        err = -ENOMEM;
1570        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1571        if (dp == NULL)
1572                goto err_free_reply;
1573
1574        ovs_dp_set_net(dp, sock_net(skb->sk));
1575
1576        /* Allocate table. */
1577        err = ovs_flow_tbl_init(&dp->table);
1578        if (err)
1579                goto err_free_dp;
1580
1581        dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1582        if (!dp->stats_percpu) {
1583                err = -ENOMEM;
1584                goto err_destroy_table;
1585        }
1586
1587        dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1588                                  sizeof(struct hlist_head),
1589                                  GFP_KERNEL);
1590        if (!dp->ports) {
1591                err = -ENOMEM;
1592                goto err_destroy_percpu;
1593        }
1594
1595        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1596                INIT_HLIST_HEAD(&dp->ports[i]);
1597
1598        err = ovs_meters_init(dp);
1599        if (err)
1600                goto err_destroy_ports_array;
1601
1602        /* Set up our datapath device. */
1603        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1604        parms.type = OVS_VPORT_TYPE_INTERNAL;
1605        parms.options = NULL;
1606        parms.dp = dp;
1607        parms.port_no = OVSP_LOCAL;
1608        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1609
1610        ovs_dp_change(dp, a);
1611
1612        /* So far only local changes have been made, now need the lock. */
1613        ovs_lock();
1614
1615        vport = new_vport(&parms);
1616        if (IS_ERR(vport)) {
1617                err = PTR_ERR(vport);
1618                if (err == -EBUSY)
1619                        err = -EEXIST;
1620
1621                if (err == -EEXIST) {
1622                        /* An outdated user space instance that does not understand
1623                         * the concept of user_features has attempted to create a new
1624                         * datapath and is likely to reuse it. Drop all user features.
1625                         */
1626                        if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1627                                ovs_dp_reset_user_features(skb, info);
1628                }
1629
1630                goto err_destroy_meters;
1631        }
1632
1633        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1634                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1635        BUG_ON(err < 0);
1636
1637        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1638        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1639
1640        ovs_unlock();
1641
1642        ovs_notify(&dp_datapath_genl_family, reply, info);
1643        return 0;
1644
1645err_destroy_meters:
1646        ovs_unlock();
1647        ovs_meters_exit(dp);
1648err_destroy_ports_array:
1649        kfree(dp->ports);
1650err_destroy_percpu:
1651        free_percpu(dp->stats_percpu);
1652err_destroy_table:
1653        ovs_flow_tbl_destroy(&dp->table);
1654err_free_dp:
1655        kfree(dp);
1656err_free_reply:
1657        kfree_skb(reply);
1658err:
1659        return err;
1660}
1661
1662/* Called with ovs_mutex. */
1663static void __dp_destroy(struct datapath *dp)
1664{
1665        int i;
1666
1667        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1668                struct vport *vport;
1669                struct hlist_node *n;
1670
1671                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1672                        if (vport->port_no != OVSP_LOCAL)
1673                                ovs_dp_detach_port(vport);
1674        }
1675
1676        list_del_rcu(&dp->list_node);
1677
1678        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1679         * all ports in datapath are destroyed first before freeing datapath.
1680         */
1681        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1682
1683        /* RCU destroy the flow table */
1684        call_rcu(&dp->rcu, destroy_dp_rcu);
1685}
1686
1687static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1688{
1689        struct sk_buff *reply;
1690        struct datapath *dp;
1691        int err;
1692
1693        reply = ovs_dp_cmd_alloc_info();
1694        if (!reply)
1695                return -ENOMEM;
1696
1697        ovs_lock();
1698        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1699        err = PTR_ERR(dp);
1700        if (IS_ERR(dp))
1701                goto err_unlock_free;
1702
1703        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1704                                   info->snd_seq, 0, OVS_DP_CMD_DEL);
1705        BUG_ON(err < 0);
1706
1707        __dp_destroy(dp);
1708        ovs_unlock();
1709
1710        ovs_notify(&dp_datapath_genl_family, reply, info);
1711
1712        return 0;
1713
1714err_unlock_free:
1715        ovs_unlock();
1716        kfree_skb(reply);
1717        return err;
1718}
1719
1720static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1721{
1722        struct sk_buff *reply;
1723        struct datapath *dp;
1724        int err;
1725
1726        reply = ovs_dp_cmd_alloc_info();
1727        if (!reply)
1728                return -ENOMEM;
1729
1730        ovs_lock();
1731        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1732        err = PTR_ERR(dp);
1733        if (IS_ERR(dp))
1734                goto err_unlock_free;
1735
1736        ovs_dp_change(dp, info->attrs);
1737
1738        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1739                                   info->snd_seq, 0, OVS_DP_CMD_SET);
1740        BUG_ON(err < 0);
1741
1742        ovs_unlock();
1743        ovs_notify(&dp_datapath_genl_family, reply, info);
1744
1745        return 0;
1746
1747err_unlock_free:
1748        ovs_unlock();
1749        kfree_skb(reply);
1750        return err;
1751}
1752
1753static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1754{
1755        struct sk_buff *reply;
1756        struct datapath *dp;
1757        int err;
1758
1759        reply = ovs_dp_cmd_alloc_info();
1760        if (!reply)
1761                return -ENOMEM;
1762
1763        ovs_lock();
1764        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1765        if (IS_ERR(dp)) {
1766                err = PTR_ERR(dp);
1767                goto err_unlock_free;
1768        }
1769        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1770                                   info->snd_seq, 0, OVS_DP_CMD_GET);
1771        BUG_ON(err < 0);
1772        ovs_unlock();
1773
1774        return genlmsg_reply(reply, info);
1775
1776err_unlock_free:
1777        ovs_unlock();
1778        kfree_skb(reply);
1779        return err;
1780}
1781
1782static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1783{
1784        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1785        struct datapath *dp;
1786        int skip = cb->args[0];
1787        int i = 0;
1788
1789        ovs_lock();
1790        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1791                if (i >= skip &&
1792                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1793                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1794                                         OVS_DP_CMD_GET) < 0)
1795                        break;
1796                i++;
1797        }
1798        ovs_unlock();
1799
1800        cb->args[0] = i;
1801
1802        return skb->len;
1803}
1804
1805static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1806        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1807        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1808        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1809};
1810
1811static const struct genl_ops dp_datapath_genl_ops[] = {
1812        { .cmd = OVS_DP_CMD_NEW,
1813          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1814          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1815          .doit = ovs_dp_cmd_new
1816        },
1817        { .cmd = OVS_DP_CMD_DEL,
1818          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1819          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1820          .doit = ovs_dp_cmd_del
1821        },
1822        { .cmd = OVS_DP_CMD_GET,
1823          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1824          .flags = 0,               /* OK for unprivileged users. */
1825          .doit = ovs_dp_cmd_get,
1826          .dumpit = ovs_dp_cmd_dump
1827        },
1828        { .cmd = OVS_DP_CMD_SET,
1829          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1830          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1831          .doit = ovs_dp_cmd_set,
1832        },
1833};
1834
1835static struct genl_family dp_datapath_genl_family __ro_after_init = {
1836        .hdrsize = sizeof(struct ovs_header),
1837        .name = OVS_DATAPATH_FAMILY,
1838        .version = OVS_DATAPATH_VERSION,
1839        .maxattr = OVS_DP_ATTR_MAX,
1840        .policy = datapath_policy,
1841        .netnsok = true,
1842        .parallel_ops = true,
1843        .ops = dp_datapath_genl_ops,
1844        .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1845        .mcgrps = &ovs_dp_datapath_multicast_group,
1846        .n_mcgrps = 1,
1847        .module = THIS_MODULE,
1848};
1849
1850/* Called with ovs_mutex or RCU read lock. */
1851static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1852                                   struct net *net, u32 portid, u32 seq,
1853                                   u32 flags, u8 cmd)
1854{
1855        struct ovs_header *ovs_header;
1856        struct ovs_vport_stats vport_stats;
1857        int err;
1858
1859        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1860                                 flags, cmd);
1861        if (!ovs_header)
1862                return -EMSGSIZE;
1863
1864        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1865
1866        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1867            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1868            nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1869                           ovs_vport_name(vport)) ||
1870            nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
1871                goto nla_put_failure;
1872
1873        if (!net_eq(net, dev_net(vport->dev))) {
1874                int id = peernet2id_alloc(net, dev_net(vport->dev));
1875
1876                if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
1877                        goto nla_put_failure;
1878        }
1879
1880        ovs_vport_get_stats(vport, &vport_stats);
1881        if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1882                          sizeof(struct ovs_vport_stats), &vport_stats,
1883                          OVS_VPORT_ATTR_PAD))
1884                goto nla_put_failure;
1885
1886        if (ovs_vport_get_upcall_portids(vport, skb))
1887                goto nla_put_failure;
1888
1889        err = ovs_vport_get_options(vport, skb);
1890        if (err == -EMSGSIZE)
1891                goto error;
1892
1893        genlmsg_end(skb, ovs_header);
1894        return 0;
1895
1896nla_put_failure:
1897        err = -EMSGSIZE;
1898error:
1899        genlmsg_cancel(skb, ovs_header);
1900        return err;
1901}
1902
1903static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1904{
1905        return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1906}
1907
1908/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1909struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
1910                                         u32 portid, u32 seq, u8 cmd)
1911{
1912        struct sk_buff *skb;
1913        int retval;
1914
1915        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1916        if (!skb)
1917                return ERR_PTR(-ENOMEM);
1918
1919        retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
1920        BUG_ON(retval < 0);
1921
1922        return skb;
1923}
1924
1925/* Called with ovs_mutex or RCU read lock. */
1926static struct vport *lookup_vport(struct net *net,
1927                                  const struct ovs_header *ovs_header,
1928                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1929{
1930        struct datapath *dp;
1931        struct vport *vport;
1932
1933        if (a[OVS_VPORT_ATTR_IFINDEX])
1934                return ERR_PTR(-EOPNOTSUPP);
1935        if (a[OVS_VPORT_ATTR_NAME]) {
1936                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1937                if (!vport)
1938                        return ERR_PTR(-ENODEV);
1939                if (ovs_header->dp_ifindex &&
1940                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1941                        return ERR_PTR(-ENODEV);
1942                return vport;
1943        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1944                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1945
1946                if (port_no >= DP_MAX_PORTS)
1947                        return ERR_PTR(-EFBIG);
1948
1949                dp = get_dp(net, ovs_header->dp_ifindex);
1950                if (!dp)
1951                        return ERR_PTR(-ENODEV);
1952
1953                vport = ovs_vport_ovsl_rcu(dp, port_no);
1954                if (!vport)
1955                        return ERR_PTR(-ENODEV);
1956                return vport;
1957        } else
1958                return ERR_PTR(-EINVAL);
1959
1960}
1961
1962static unsigned int ovs_get_max_headroom(struct datapath *dp)
1963{
1964        unsigned int dev_headroom, max_headroom = 0;
1965        struct net_device *dev;
1966        struct vport *vport;
1967        int i;
1968
1969        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1970                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1971                        dev = vport->dev;
1972                        dev_headroom = netdev_get_fwd_headroom(dev);
1973                        if (dev_headroom > max_headroom)
1974                                max_headroom = dev_headroom;
1975                }
1976        }
1977
1978        return max_headroom;
1979}
1980
1981/* Called with ovs_mutex */
1982static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
1983{
1984        struct vport *vport;
1985        int i;
1986
1987        dp->max_headroom = new_headroom;
1988        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1989                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1990                        netdev_set_rx_headroom(vport->dev, new_headroom);
1991}
1992
1993static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1994{
1995        struct nlattr **a = info->attrs;
1996        struct ovs_header *ovs_header = info->userhdr;
1997        struct vport_parms parms;
1998        struct sk_buff *reply;
1999        struct vport *vport;
2000        struct datapath *dp;
2001        unsigned int new_headroom;
2002        u32 port_no;
2003        int err;
2004
2005        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2006            !a[OVS_VPORT_ATTR_UPCALL_PID])
2007                return -EINVAL;
2008        if (a[OVS_VPORT_ATTR_IFINDEX])
2009                return -EOPNOTSUPP;
2010
2011        port_no = a[OVS_VPORT_ATTR_PORT_NO]
2012                ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2013        if (port_no >= DP_MAX_PORTS)
2014                return -EFBIG;
2015
2016        reply = ovs_vport_cmd_alloc_info();
2017        if (!reply)
2018                return -ENOMEM;
2019
2020        ovs_lock();
2021restart:
2022        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2023        err = -ENODEV;
2024        if (!dp)
2025                goto exit_unlock_free;
2026
2027        if (port_no) {
2028                vport = ovs_vport_ovsl(dp, port_no);
2029                err = -EBUSY;
2030                if (vport)
2031                        goto exit_unlock_free;
2032        } else {
2033                for (port_no = 1; ; port_no++) {
2034                        if (port_no >= DP_MAX_PORTS) {
2035                                err = -EFBIG;
2036                                goto exit_unlock_free;
2037                        }
2038                        vport = ovs_vport_ovsl(dp, port_no);
2039                        if (!vport)
2040                                break;
2041                }
2042        }
2043
2044        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2045        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2046        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2047        parms.dp = dp;
2048        parms.port_no = port_no;
2049        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2050
2051        vport = new_vport(&parms);
2052        err = PTR_ERR(vport);
2053        if (IS_ERR(vport)) {
2054                if (err == -EAGAIN)
2055                        goto restart;
2056                goto exit_unlock_free;
2057        }
2058
2059        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2060                                      info->snd_portid, info->snd_seq, 0,
2061                                      OVS_VPORT_CMD_NEW);
2062
2063        new_headroom = netdev_get_fwd_headroom(vport->dev);
2064
2065        if (new_headroom > dp->max_headroom)
2066                ovs_update_headroom(dp, new_headroom);
2067        else
2068                netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2069
2070        BUG_ON(err < 0);
2071        ovs_unlock();
2072
2073        ovs_notify(&dp_vport_genl_family, reply, info);
2074        return 0;
2075
2076exit_unlock_free:
2077        ovs_unlock();
2078        kfree_skb(reply);
2079        return err;
2080}
2081
2082static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2083{
2084        struct nlattr **a = info->attrs;
2085        struct sk_buff *reply;
2086        struct vport *vport;
2087        int err;
2088
2089        reply = ovs_vport_cmd_alloc_info();
2090        if (!reply)
2091                return -ENOMEM;
2092
2093        ovs_lock();
2094        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2095        err = PTR_ERR(vport);
2096        if (IS_ERR(vport))
2097                goto exit_unlock_free;
2098
2099        if (a[OVS_VPORT_ATTR_TYPE] &&
2100            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2101                err = -EINVAL;
2102                goto exit_unlock_free;
2103        }
2104
2105        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2106                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2107                if (err)
2108                        goto exit_unlock_free;
2109        }
2110
2111
2112        if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2113                struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2114
2115                err = ovs_vport_set_upcall_portids(vport, ids);
2116                if (err)
2117                        goto exit_unlock_free;
2118        }
2119
2120        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2121                                      info->snd_portid, info->snd_seq, 0,
2122                                      OVS_VPORT_CMD_SET);
2123        BUG_ON(err < 0);
2124
2125        ovs_unlock();
2126        ovs_notify(&dp_vport_genl_family, reply, info);
2127        return 0;
2128
2129exit_unlock_free:
2130        ovs_unlock();
2131        kfree_skb(reply);
2132        return err;
2133}
2134
2135static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2136{
2137        bool update_headroom = false;
2138        struct nlattr **a = info->attrs;
2139        struct sk_buff *reply;
2140        struct datapath *dp;
2141        struct vport *vport;
2142        unsigned int new_headroom;
2143        int err;
2144
2145        reply = ovs_vport_cmd_alloc_info();
2146        if (!reply)
2147                return -ENOMEM;
2148
2149        ovs_lock();
2150        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2151        err = PTR_ERR(vport);
2152        if (IS_ERR(vport))
2153                goto exit_unlock_free;
2154
2155        if (vport->port_no == OVSP_LOCAL) {
2156                err = -EINVAL;
2157                goto exit_unlock_free;
2158        }
2159
2160        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2161                                      info->snd_portid, info->snd_seq, 0,
2162                                      OVS_VPORT_CMD_DEL);
2163        BUG_ON(err < 0);
2164
2165        /* the vport deletion may trigger dp headroom update */
2166        dp = vport->dp;
2167        if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2168                update_headroom = true;
2169
2170        netdev_reset_rx_headroom(vport->dev);
2171        ovs_dp_detach_port(vport);
2172
2173        if (update_headroom) {
2174                new_headroom = ovs_get_max_headroom(dp);
2175
2176                if (new_headroom < dp->max_headroom)
2177                        ovs_update_headroom(dp, new_headroom);
2178        }
2179        ovs_unlock();
2180
2181        ovs_notify(&dp_vport_genl_family, reply, info);
2182        return 0;
2183
2184exit_unlock_free:
2185        ovs_unlock();
2186        kfree_skb(reply);
2187        return err;
2188}
2189
2190static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2191{
2192        struct nlattr **a = info->attrs;
2193        struct ovs_header *ovs_header = info->userhdr;
2194        struct sk_buff *reply;
2195        struct vport *vport;
2196        int err;
2197
2198        reply = ovs_vport_cmd_alloc_info();
2199        if (!reply)
2200                return -ENOMEM;
2201
2202        rcu_read_lock();
2203        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2204        err = PTR_ERR(vport);
2205        if (IS_ERR(vport))
2206                goto exit_unlock_free;
2207        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2208                                      info->snd_portid, info->snd_seq, 0,
2209                                      OVS_VPORT_CMD_GET);
2210        BUG_ON(err < 0);
2211        rcu_read_unlock();
2212
2213        return genlmsg_reply(reply, info);
2214
2215exit_unlock_free:
2216        rcu_read_unlock();
2217        kfree_skb(reply);
2218        return err;
2219}
2220
2221static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2222{
2223        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2224        struct datapath *dp;
2225        int bucket = cb->args[0], skip = cb->args[1];
2226        int i, j = 0;
2227
2228        rcu_read_lock();
2229        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2230        if (!dp) {
2231                rcu_read_unlock();
2232                return -ENODEV;
2233        }
2234        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2235                struct vport *vport;
2236
2237                j = 0;
2238                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2239                        if (j >= skip &&
2240                            ovs_vport_cmd_fill_info(vport, skb,
2241                                                    sock_net(skb->sk),
2242                                                    NETLINK_CB(cb->skb).portid,
2243                                                    cb->nlh->nlmsg_seq,
2244                                                    NLM_F_MULTI,
2245                                                    OVS_VPORT_CMD_GET) < 0)
2246                                goto out;
2247
2248                        j++;
2249                }
2250                skip = 0;
2251        }
2252out:
2253        rcu_read_unlock();
2254
2255        cb->args[0] = i;
2256        cb->args[1] = j;
2257
2258        return skb->len;
2259}
2260
2261static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2262        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2263        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2264        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2265        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2266        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2267        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2268        [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2269        [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2270};
2271
2272static const struct genl_ops dp_vport_genl_ops[] = {
2273        { .cmd = OVS_VPORT_CMD_NEW,
2274          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2275          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2276          .doit = ovs_vport_cmd_new
2277        },
2278        { .cmd = OVS_VPORT_CMD_DEL,
2279          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2280          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2281          .doit = ovs_vport_cmd_del
2282        },
2283        { .cmd = OVS_VPORT_CMD_GET,
2284          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2285          .flags = 0,               /* OK for unprivileged users. */
2286          .doit = ovs_vport_cmd_get,
2287          .dumpit = ovs_vport_cmd_dump
2288        },
2289        { .cmd = OVS_VPORT_CMD_SET,
2290          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2291          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2292          .doit = ovs_vport_cmd_set,
2293        },
2294};
2295
2296struct genl_family dp_vport_genl_family __ro_after_init = {
2297        .hdrsize = sizeof(struct ovs_header),
2298        .name = OVS_VPORT_FAMILY,
2299        .version = OVS_VPORT_VERSION,
2300        .maxattr = OVS_VPORT_ATTR_MAX,
2301        .policy = vport_policy,
2302        .netnsok = true,
2303        .parallel_ops = true,
2304        .ops = dp_vport_genl_ops,
2305        .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2306        .mcgrps = &ovs_dp_vport_multicast_group,
2307        .n_mcgrps = 1,
2308        .module = THIS_MODULE,
2309};
2310
2311static struct genl_family * const dp_genl_families[] = {
2312        &dp_datapath_genl_family,
2313        &dp_vport_genl_family,
2314        &dp_flow_genl_family,
2315        &dp_packet_genl_family,
2316        &dp_meter_genl_family,
2317#if     IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2318        &dp_ct_limit_genl_family,
2319#endif
2320};
2321
2322static void dp_unregister_genl(int n_families)
2323{
2324        int i;
2325
2326        for (i = 0; i < n_families; i++)
2327                genl_unregister_family(dp_genl_families[i]);
2328}
2329
2330static int __init dp_register_genl(void)
2331{
2332        int err;
2333        int i;
2334
2335        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2336
2337                err = genl_register_family(dp_genl_families[i]);
2338                if (err)
2339                        goto error;
2340        }
2341
2342        return 0;
2343
2344error:
2345        dp_unregister_genl(i);
2346        return err;
2347}
2348
2349static int __net_init ovs_init_net(struct net *net)
2350{
2351        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2352
2353        INIT_LIST_HEAD(&ovs_net->dps);
2354        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2355        return ovs_ct_init(net);
2356}
2357
2358static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2359                                            struct list_head *head)
2360{
2361        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2362        struct datapath *dp;
2363
2364        list_for_each_entry(dp, &ovs_net->dps, list_node) {
2365                int i;
2366
2367                for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2368                        struct vport *vport;
2369
2370                        hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2371                                if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2372                                        continue;
2373
2374                                if (dev_net(vport->dev) == dnet)
2375                                        list_add(&vport->detach_list, head);
2376                        }
2377                }
2378        }
2379}
2380
2381static void __net_exit ovs_exit_net(struct net *dnet)
2382{
2383        struct datapath *dp, *dp_next;
2384        struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2385        struct vport *vport, *vport_next;
2386        struct net *net;
2387        LIST_HEAD(head);
2388
2389        ovs_ct_exit(dnet);
2390        ovs_lock();
2391        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2392                __dp_destroy(dp);
2393
2394        down_read(&net_rwsem);
2395        for_each_net(net)
2396                list_vports_from_net(net, dnet, &head);
2397        up_read(&net_rwsem);
2398
2399        /* Detach all vports from given namespace. */
2400        list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2401                list_del(&vport->detach_list);
2402                ovs_dp_detach_port(vport);
2403        }
2404
2405        ovs_unlock();
2406
2407        cancel_work_sync(&ovs_net->dp_notify_work);
2408}
2409
2410static struct pernet_operations ovs_net_ops = {
2411        .init = ovs_init_net,
2412        .exit = ovs_exit_net,
2413        .id   = &ovs_net_id,
2414        .size = sizeof(struct ovs_net),
2415};
2416
2417static int __init dp_init(void)
2418{
2419        int err;
2420
2421        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2422
2423        pr_info("Open vSwitch switching datapath\n");
2424
2425        err = action_fifos_init();
2426        if (err)
2427                goto error;
2428
2429        err = ovs_internal_dev_rtnl_link_register();
2430        if (err)
2431                goto error_action_fifos_exit;
2432
2433        err = ovs_flow_init();
2434        if (err)
2435                goto error_unreg_rtnl_link;
2436
2437        err = ovs_vport_init();
2438        if (err)
2439                goto error_flow_exit;
2440
2441        err = register_pernet_device(&ovs_net_ops);
2442        if (err)
2443                goto error_vport_exit;
2444
2445        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2446        if (err)
2447                goto error_netns_exit;
2448
2449        err = ovs_netdev_init();
2450        if (err)
2451                goto error_unreg_notifier;
2452
2453        err = dp_register_genl();
2454        if (err < 0)
2455                goto error_unreg_netdev;
2456
2457        return 0;
2458
2459error_unreg_netdev:
2460        ovs_netdev_exit();
2461error_unreg_notifier:
2462        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2463error_netns_exit:
2464        unregister_pernet_device(&ovs_net_ops);
2465error_vport_exit:
2466        ovs_vport_exit();
2467error_flow_exit:
2468        ovs_flow_exit();
2469error_unreg_rtnl_link:
2470        ovs_internal_dev_rtnl_link_unregister();
2471error_action_fifos_exit:
2472        action_fifos_exit();
2473error:
2474        return err;
2475}
2476
2477static void dp_cleanup(void)
2478{
2479        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2480        ovs_netdev_exit();
2481        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2482        unregister_pernet_device(&ovs_net_ops);
2483        rcu_barrier();
2484        ovs_vport_exit();
2485        ovs_flow_exit();
2486        ovs_internal_dev_rtnl_link_unregister();
2487        action_fifos_exit();
2488}
2489
2490module_init(dp_init);
2491module_exit(dp_cleanup);
2492
2493MODULE_DESCRIPTION("Open vSwitch switching datapath");
2494MODULE_LICENSE("GPL");
2495MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2496MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2497MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2498MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2499MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2500MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
2501