linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2007-2014 Nicira, Inc.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <linux/init.h>
   9#include <linux/module.h>
  10#include <linux/if_arp.h>
  11#include <linux/if_vlan.h>
  12#include <linux/in.h>
  13#include <linux/ip.h>
  14#include <linux/jhash.h>
  15#include <linux/delay.h>
  16#include <linux/time.h>
  17#include <linux/etherdevice.h>
  18#include <linux/genetlink.h>
  19#include <linux/kernel.h>
  20#include <linux/kthread.h>
  21#include <linux/mutex.h>
  22#include <linux/percpu.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/ethtool.h>
  27#include <linux/wait.h>
  28#include <asm/div64.h>
  29#include <linux/highmem.h>
  30#include <linux/netfilter_bridge.h>
  31#include <linux/netfilter_ipv4.h>
  32#include <linux/inetdevice.h>
  33#include <linux/list.h>
  34#include <linux/openvswitch.h>
  35#include <linux/rculist.h>
  36#include <linux/dmi.h>
  37#include <net/genetlink.h>
  38#include <net/net_namespace.h>
  39#include <net/netns/generic.h>
  40
  41#include "datapath.h"
  42#include "flow.h"
  43#include "flow_table.h"
  44#include "flow_netlink.h"
  45#include "meter.h"
  46#include "vport-internal_dev.h"
  47#include "vport-netdev.h"
  48
  49unsigned int ovs_net_id __read_mostly;
  50
  51static struct genl_family dp_packet_genl_family;
  52static struct genl_family dp_flow_genl_family;
  53static struct genl_family dp_datapath_genl_family;
  54
  55static const struct nla_policy flow_policy[];
  56
  57static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
  58        .name = OVS_FLOW_MCGROUP,
  59};
  60
  61static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  62        .name = OVS_DATAPATH_MCGROUP,
  63};
  64
  65static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
  66        .name = OVS_VPORT_MCGROUP,
  67};
  68
  69/* Check if need to build a reply message.
  70 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  71static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
  72                            unsigned int group)
  73{
  74        return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  75               genl_has_listeners(family, genl_info_net(info), group);
  76}
  77
  78static void ovs_notify(struct genl_family *family,
  79                       struct sk_buff *skb, struct genl_info *info)
  80{
  81        genl_notify(family, skb, info, 0, GFP_KERNEL);
  82}
  83
  84/**
  85 * DOC: Locking:
  86 *
  87 * All writes e.g. Writes to device state (add/remove datapath, port, set
  88 * operations on vports, etc.), Writes to other state (flow table
  89 * modifications, set miscellaneous datapath parameters, etc.) are protected
  90 * by ovs_lock.
  91 *
  92 * Reads are protected by RCU.
  93 *
  94 * There are a few special cases (mostly stats) that have their own
  95 * synchronization but they nest under all of above and don't interact with
  96 * each other.
  97 *
  98 * The RTNL lock nests inside ovs_mutex.
  99 */
 100
 101static DEFINE_MUTEX(ovs_mutex);
 102
 103void ovs_lock(void)
 104{
 105        mutex_lock(&ovs_mutex);
 106}
 107
 108void ovs_unlock(void)
 109{
 110        mutex_unlock(&ovs_mutex);
 111}
 112
 113#ifdef CONFIG_LOCKDEP
 114int lockdep_ovsl_is_held(void)
 115{
 116        if (debug_locks)
 117                return lockdep_is_held(&ovs_mutex);
 118        else
 119                return 1;
 120}
 121#endif
 122
 123static struct vport *new_vport(const struct vport_parms *);
 124static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 125                             const struct sw_flow_key *,
 126                             const struct dp_upcall_info *,
 127                             uint32_t cutlen);
 128static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 129                                  const struct sw_flow_key *,
 130                                  const struct dp_upcall_info *,
 131                                  uint32_t cutlen);
 132
 133/* Must be called with rcu_read_lock or ovs_mutex. */
 134const char *ovs_dp_name(const struct datapath *dp)
 135{
 136        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 137        return ovs_vport_name(vport);
 138}
 139
 140static int get_dpifindex(const struct datapath *dp)
 141{
 142        struct vport *local;
 143        int ifindex;
 144
 145        rcu_read_lock();
 146
 147        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 148        if (local)
 149                ifindex = local->dev->ifindex;
 150        else
 151                ifindex = 0;
 152
 153        rcu_read_unlock();
 154
 155        return ifindex;
 156}
 157
 158static void destroy_dp_rcu(struct rcu_head *rcu)
 159{
 160        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 161
 162        ovs_flow_tbl_destroy(&dp->table);
 163        free_percpu(dp->stats_percpu);
 164        kfree(dp->ports);
 165        ovs_meters_exit(dp);
 166        kfree(dp);
 167}
 168
 169static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 170                                            u16 port_no)
 171{
 172        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 173}
 174
 175/* Called with ovs_mutex or RCU read lock. */
 176struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 177{
 178        struct vport *vport;
 179        struct hlist_head *head;
 180
 181        head = vport_hash_bucket(dp, port_no);
 182        hlist_for_each_entry_rcu(vport, head, dp_hash_node,
 183                                lockdep_ovsl_is_held()) {
 184                if (vport->port_no == port_no)
 185                        return vport;
 186        }
 187        return NULL;
 188}
 189
 190/* Called with ovs_mutex. */
 191static struct vport *new_vport(const struct vport_parms *parms)
 192{
 193        struct vport *vport;
 194
 195        vport = ovs_vport_add(parms);
 196        if (!IS_ERR(vport)) {
 197                struct datapath *dp = parms->dp;
 198                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 199
 200                hlist_add_head_rcu(&vport->dp_hash_node, head);
 201        }
 202        return vport;
 203}
 204
 205void ovs_dp_detach_port(struct vport *p)
 206{
 207        ASSERT_OVSL();
 208
 209        /* First drop references to device. */
 210        hlist_del_rcu(&p->dp_hash_node);
 211
 212        /* Then destroy it. */
 213        ovs_vport_del(p);
 214}
 215
 216/* Must be called with rcu_read_lock. */
 217void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 218{
 219        const struct vport *p = OVS_CB(skb)->input_vport;
 220        struct datapath *dp = p->dp;
 221        struct sw_flow *flow;
 222        struct sw_flow_actions *sf_acts;
 223        struct dp_stats_percpu *stats;
 224        u64 *stats_counter;
 225        u32 n_mask_hit;
 226        int error;
 227
 228        stats = this_cpu_ptr(dp->stats_percpu);
 229
 230        /* Look up flow. */
 231        flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
 232                                         &n_mask_hit);
 233        if (unlikely(!flow)) {
 234                struct dp_upcall_info upcall;
 235
 236                memset(&upcall, 0, sizeof(upcall));
 237                upcall.cmd = OVS_PACKET_CMD_MISS;
 238                upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 239                upcall.mru = OVS_CB(skb)->mru;
 240                error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
 241                if (unlikely(error))
 242                        kfree_skb(skb);
 243                else
 244                        consume_skb(skb);
 245                stats_counter = &stats->n_missed;
 246                goto out;
 247        }
 248
 249        ovs_flow_stats_update(flow, key->tp.flags, skb);
 250        sf_acts = rcu_dereference(flow->sf_acts);
 251        error = ovs_execute_actions(dp, skb, sf_acts, key);
 252        if (unlikely(error))
 253                net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
 254                                                        ovs_dp_name(dp), error);
 255
 256        stats_counter = &stats->n_hit;
 257
 258out:
 259        /* Update datapath statistics. */
 260        u64_stats_update_begin(&stats->syncp);
 261        (*stats_counter)++;
 262        stats->n_mask_hit += n_mask_hit;
 263        u64_stats_update_end(&stats->syncp);
 264}
 265
 266int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 267                  const struct sw_flow_key *key,
 268                  const struct dp_upcall_info *upcall_info,
 269                  uint32_t cutlen)
 270{
 271        struct dp_stats_percpu *stats;
 272        int err;
 273
 274        if (upcall_info->portid == 0) {
 275                err = -ENOTCONN;
 276                goto err;
 277        }
 278
 279        if (!skb_is_gso(skb))
 280                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 281        else
 282                err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
 283        if (err)
 284                goto err;
 285
 286        return 0;
 287
 288err:
 289        stats = this_cpu_ptr(dp->stats_percpu);
 290
 291        u64_stats_update_begin(&stats->syncp);
 292        stats->n_lost++;
 293        u64_stats_update_end(&stats->syncp);
 294
 295        return err;
 296}
 297
 298static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 299                             const struct sw_flow_key *key,
 300                             const struct dp_upcall_info *upcall_info,
 301                                 uint32_t cutlen)
 302{
 303        unsigned int gso_type = skb_shinfo(skb)->gso_type;
 304        struct sw_flow_key later_key;
 305        struct sk_buff *segs, *nskb;
 306        int err;
 307
 308        BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
 309        segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 310        if (IS_ERR(segs))
 311                return PTR_ERR(segs);
 312        if (segs == NULL)
 313                return -EINVAL;
 314
 315        if (gso_type & SKB_GSO_UDP) {
 316                /* The initial flow key extracted by ovs_flow_key_extract()
 317                 * in this case is for a first fragment, so we need to
 318                 * properly mark later fragments.
 319                 */
 320                later_key = *key;
 321                later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 322        }
 323
 324        /* Queue all of the segments. */
 325        skb_list_walk_safe(segs, skb, nskb) {
 326                if (gso_type & SKB_GSO_UDP && skb != segs)
 327                        key = &later_key;
 328
 329                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 330                if (err)
 331                        break;
 332
 333        }
 334
 335        /* Free all of the segments. */
 336        skb_list_walk_safe(segs, skb, nskb) {
 337                if (err)
 338                        kfree_skb(skb);
 339                else
 340                        consume_skb(skb);
 341        }
 342        return err;
 343}
 344
 345static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 346                              unsigned int hdrlen, int actions_attrlen)
 347{
 348        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 349                + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
 350                + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
 351                + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
 352                + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
 353
 354        /* OVS_PACKET_ATTR_USERDATA */
 355        if (upcall_info->userdata)
 356                size += NLA_ALIGN(upcall_info->userdata->nla_len);
 357
 358        /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
 359        if (upcall_info->egress_tun_info)
 360                size += nla_total_size(ovs_tun_key_attr_size());
 361
 362        /* OVS_PACKET_ATTR_ACTIONS */
 363        if (upcall_info->actions_len)
 364                size += nla_total_size(actions_attrlen);
 365
 366        /* OVS_PACKET_ATTR_MRU */
 367        if (upcall_info->mru)
 368                size += nla_total_size(sizeof(upcall_info->mru));
 369
 370        return size;
 371}
 372
 373static void pad_packet(struct datapath *dp, struct sk_buff *skb)
 374{
 375        if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
 376                size_t plen = NLA_ALIGN(skb->len) - skb->len;
 377
 378                if (plen > 0)
 379                        skb_put_zero(skb, plen);
 380        }
 381}
 382
 383static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 384                                  const struct sw_flow_key *key,
 385                                  const struct dp_upcall_info *upcall_info,
 386                                  uint32_t cutlen)
 387{
 388        struct ovs_header *upcall;
 389        struct sk_buff *nskb = NULL;
 390        struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 391        struct nlattr *nla;
 392        size_t len;
 393        unsigned int hlen;
 394        int err, dp_ifindex;
 395        u64 hash;
 396
 397        dp_ifindex = get_dpifindex(dp);
 398        if (!dp_ifindex)
 399                return -ENODEV;
 400
 401        if (skb_vlan_tag_present(skb)) {
 402                nskb = skb_clone(skb, GFP_ATOMIC);
 403                if (!nskb)
 404                        return -ENOMEM;
 405
 406                nskb = __vlan_hwaccel_push_inside(nskb);
 407                if (!nskb)
 408                        return -ENOMEM;
 409
 410                skb = nskb;
 411        }
 412
 413        if (nla_attr_size(skb->len) > USHRT_MAX) {
 414                err = -EFBIG;
 415                goto out;
 416        }
 417
 418        /* Complete checksum if needed */
 419        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 420            (err = skb_csum_hwoffload_help(skb, 0)))
 421                goto out;
 422
 423        /* Older versions of OVS user space enforce alignment of the last
 424         * Netlink attribute to NLA_ALIGNTO which would require extensive
 425         * padding logic. Only perform zerocopy if padding is not required.
 426         */
 427        if (dp->user_features & OVS_DP_F_UNALIGNED)
 428                hlen = skb_zerocopy_headlen(skb);
 429        else
 430                hlen = skb->len;
 431
 432        len = upcall_msg_size(upcall_info, hlen - cutlen,
 433                              OVS_CB(skb)->acts_origlen);
 434        user_skb = genlmsg_new(len, GFP_ATOMIC);
 435        if (!user_skb) {
 436                err = -ENOMEM;
 437                goto out;
 438        }
 439
 440        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 441                             0, upcall_info->cmd);
 442        if (!upcall) {
 443                err = -EINVAL;
 444                goto out;
 445        }
 446        upcall->dp_ifindex = dp_ifindex;
 447
 448        err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 449        if (err)
 450                goto out;
 451
 452        if (upcall_info->userdata)
 453                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 454                          nla_len(upcall_info->userdata),
 455                          nla_data(upcall_info->userdata));
 456
 457        if (upcall_info->egress_tun_info) {
 458                nla = nla_nest_start_noflag(user_skb,
 459                                            OVS_PACKET_ATTR_EGRESS_TUN_KEY);
 460                if (!nla) {
 461                        err = -EMSGSIZE;
 462                        goto out;
 463                }
 464                err = ovs_nla_put_tunnel_info(user_skb,
 465                                              upcall_info->egress_tun_info);
 466                if (err)
 467                        goto out;
 468
 469                nla_nest_end(user_skb, nla);
 470        }
 471
 472        if (upcall_info->actions_len) {
 473                nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
 474                if (!nla) {
 475                        err = -EMSGSIZE;
 476                        goto out;
 477                }
 478                err = ovs_nla_put_actions(upcall_info->actions,
 479                                          upcall_info->actions_len,
 480                                          user_skb);
 481                if (!err)
 482                        nla_nest_end(user_skb, nla);
 483                else
 484                        nla_nest_cancel(user_skb, nla);
 485        }
 486
 487        /* Add OVS_PACKET_ATTR_MRU */
 488        if (upcall_info->mru &&
 489            nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
 490                err = -ENOBUFS;
 491                goto out;
 492        }
 493
 494        /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
 495        if (cutlen > 0 &&
 496            nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
 497                err = -ENOBUFS;
 498                goto out;
 499        }
 500
 501        /* Add OVS_PACKET_ATTR_HASH */
 502        hash = skb_get_hash_raw(skb);
 503        if (skb->sw_hash)
 504                hash |= OVS_PACKET_HASH_SW_BIT;
 505
 506        if (skb->l4_hash)
 507                hash |= OVS_PACKET_HASH_L4_BIT;
 508
 509        if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
 510                err = -ENOBUFS;
 511                goto out;
 512        }
 513
 514        /* Only reserve room for attribute header, packet data is added
 515         * in skb_zerocopy() */
 516        if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 517                err = -ENOBUFS;
 518                goto out;
 519        }
 520        nla->nla_len = nla_attr_size(skb->len - cutlen);
 521
 522        err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
 523        if (err)
 524                goto out;
 525
 526        /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
 527        pad_packet(dp, user_skb);
 528
 529        ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 530
 531        err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
 532        user_skb = NULL;
 533out:
 534        if (err)
 535                skb_tx_error(skb);
 536        kfree_skb(user_skb);
 537        kfree_skb(nskb);
 538        return err;
 539}
 540
 541static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 542{
 543        struct ovs_header *ovs_header = info->userhdr;
 544        struct net *net = sock_net(skb->sk);
 545        struct nlattr **a = info->attrs;
 546        struct sw_flow_actions *acts;
 547        struct sk_buff *packet;
 548        struct sw_flow *flow;
 549        struct sw_flow_actions *sf_acts;
 550        struct datapath *dp;
 551        struct vport *input_vport;
 552        u16 mru = 0;
 553        u64 hash;
 554        int len;
 555        int err;
 556        bool log = !a[OVS_PACKET_ATTR_PROBE];
 557
 558        err = -EINVAL;
 559        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 560            !a[OVS_PACKET_ATTR_ACTIONS])
 561                goto err;
 562
 563        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 564        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 565        err = -ENOMEM;
 566        if (!packet)
 567                goto err;
 568        skb_reserve(packet, NET_IP_ALIGN);
 569
 570        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 571
 572        /* Set packet's mru */
 573        if (a[OVS_PACKET_ATTR_MRU]) {
 574                mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
 575                packet->ignore_df = 1;
 576        }
 577        OVS_CB(packet)->mru = mru;
 578
 579        if (a[OVS_PACKET_ATTR_HASH]) {
 580                hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
 581
 582                __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
 583                               !!(hash & OVS_PACKET_HASH_SW_BIT),
 584                               !!(hash & OVS_PACKET_HASH_L4_BIT));
 585        }
 586
 587        /* Build an sw_flow for sending this packet. */
 588        flow = ovs_flow_alloc();
 589        err = PTR_ERR(flow);
 590        if (IS_ERR(flow))
 591                goto err_kfree_skb;
 592
 593        err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
 594                                             packet, &flow->key, log);
 595        if (err)
 596                goto err_flow_free;
 597
 598        err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
 599                                   &flow->key, &acts, log);
 600        if (err)
 601                goto err_flow_free;
 602
 603        rcu_assign_pointer(flow->sf_acts, acts);
 604        packet->priority = flow->key.phy.priority;
 605        packet->mark = flow->key.phy.skb_mark;
 606
 607        rcu_read_lock();
 608        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
 609        err = -ENODEV;
 610        if (!dp)
 611                goto err_unlock;
 612
 613        input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
 614        if (!input_vport)
 615                input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
 616
 617        if (!input_vport)
 618                goto err_unlock;
 619
 620        packet->dev = input_vport->dev;
 621        OVS_CB(packet)->input_vport = input_vport;
 622        sf_acts = rcu_dereference(flow->sf_acts);
 623
 624        local_bh_disable();
 625        err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
 626        local_bh_enable();
 627        rcu_read_unlock();
 628
 629        ovs_flow_free(flow, false);
 630        return err;
 631
 632err_unlock:
 633        rcu_read_unlock();
 634err_flow_free:
 635        ovs_flow_free(flow, false);
 636err_kfree_skb:
 637        kfree_skb(packet);
 638err:
 639        return err;
 640}
 641
 642static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 643        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 644        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 645        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 646        [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
 647        [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
 648        [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
 649};
 650
 651static const struct genl_ops dp_packet_genl_ops[] = {
 652        { .cmd = OVS_PACKET_CMD_EXECUTE,
 653          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 654          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 655          .doit = ovs_packet_cmd_execute
 656        }
 657};
 658
 659static struct genl_family dp_packet_genl_family __ro_after_init = {
 660        .hdrsize = sizeof(struct ovs_header),
 661        .name = OVS_PACKET_FAMILY,
 662        .version = OVS_PACKET_VERSION,
 663        .maxattr = OVS_PACKET_ATTR_MAX,
 664        .policy = packet_policy,
 665        .netnsok = true,
 666        .parallel_ops = true,
 667        .ops = dp_packet_genl_ops,
 668        .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
 669        .module = THIS_MODULE,
 670};
 671
 672static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 673                         struct ovs_dp_megaflow_stats *mega_stats)
 674{
 675        int i;
 676
 677        memset(mega_stats, 0, sizeof(*mega_stats));
 678
 679        stats->n_flows = ovs_flow_tbl_count(&dp->table);
 680        mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 681
 682        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 683
 684        for_each_possible_cpu(i) {
 685                const struct dp_stats_percpu *percpu_stats;
 686                struct dp_stats_percpu local_stats;
 687                unsigned int start;
 688
 689                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 690
 691                do {
 692                        start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
 693                        local_stats = *percpu_stats;
 694                } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
 695
 696                stats->n_hit += local_stats.n_hit;
 697                stats->n_missed += local_stats.n_missed;
 698                stats->n_lost += local_stats.n_lost;
 699                mega_stats->n_mask_hit += local_stats.n_mask_hit;
 700        }
 701}
 702
 703static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
 704{
 705        return ovs_identifier_is_ufid(sfid) &&
 706               !(ufid_flags & OVS_UFID_F_OMIT_KEY);
 707}
 708
 709static bool should_fill_mask(uint32_t ufid_flags)
 710{
 711        return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
 712}
 713
 714static bool should_fill_actions(uint32_t ufid_flags)
 715{
 716        return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
 717}
 718
 719static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
 720                                    const struct sw_flow_id *sfid,
 721                                    uint32_t ufid_flags)
 722{
 723        size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 724
 725        /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
 726         * see ovs_nla_put_identifier()
 727         */
 728        if (sfid && ovs_identifier_is_ufid(sfid))
 729                len += nla_total_size(sfid->ufid_len);
 730        else
 731                len += nla_total_size(ovs_key_attr_size());
 732
 733        /* OVS_FLOW_ATTR_KEY */
 734        if (!sfid || should_fill_key(sfid, ufid_flags))
 735                len += nla_total_size(ovs_key_attr_size());
 736
 737        /* OVS_FLOW_ATTR_MASK */
 738        if (should_fill_mask(ufid_flags))
 739                len += nla_total_size(ovs_key_attr_size());
 740
 741        /* OVS_FLOW_ATTR_ACTIONS */
 742        if (should_fill_actions(ufid_flags))
 743                len += nla_total_size(acts->orig_len);
 744
 745        return len
 746                + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 747                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 748                + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
 749}
 750
 751/* Called with ovs_mutex or RCU read lock. */
 752static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 753                                   struct sk_buff *skb)
 754{
 755        struct ovs_flow_stats stats;
 756        __be16 tcp_flags;
 757        unsigned long used;
 758
 759        ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 760
 761        if (used &&
 762            nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
 763                              OVS_FLOW_ATTR_PAD))
 764                return -EMSGSIZE;
 765
 766        if (stats.n_packets &&
 767            nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
 768                          sizeof(struct ovs_flow_stats), &stats,
 769                          OVS_FLOW_ATTR_PAD))
 770                return -EMSGSIZE;
 771
 772        if ((u8)ntohs(tcp_flags) &&
 773             nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
 774                return -EMSGSIZE;
 775
 776        return 0;
 777}
 778
 779/* Called with ovs_mutex or RCU read lock. */
 780static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 781                                     struct sk_buff *skb, int skb_orig_len)
 782{
 783        struct nlattr *start;
 784        int err;
 785
 786        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 787         * this is the first flow to be dumped into 'skb'.  This is unusual for
 788         * Netlink but individual action lists can be longer than
 789         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 790         * The userspace caller can always fetch the actions separately if it
 791         * really wants them.  (Most userspace callers in fact don't care.)
 792         *
 793         * This can only fail for dump operations because the skb is always
 794         * properly sized for single flows.
 795         */
 796        start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
 797        if (start) {
 798                const struct sw_flow_actions *sf_acts;
 799
 800                sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 801                err = ovs_nla_put_actions(sf_acts->actions,
 802                                          sf_acts->actions_len, skb);
 803
 804                if (!err)
 805                        nla_nest_end(skb, start);
 806                else {
 807                        if (skb_orig_len)
 808                                return err;
 809
 810                        nla_nest_cancel(skb, start);
 811                }
 812        } else if (skb_orig_len) {
 813                return -EMSGSIZE;
 814        }
 815
 816        return 0;
 817}
 818
 819/* Called with ovs_mutex or RCU read lock. */
 820static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 821                                  struct sk_buff *skb, u32 portid,
 822                                  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 823{
 824        const int skb_orig_len = skb->len;
 825        struct ovs_header *ovs_header;
 826        int err;
 827
 828        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
 829                                 flags, cmd);
 830        if (!ovs_header)
 831                return -EMSGSIZE;
 832
 833        ovs_header->dp_ifindex = dp_ifindex;
 834
 835        err = ovs_nla_put_identifier(flow, skb);
 836        if (err)
 837                goto error;
 838
 839        if (should_fill_key(&flow->id, ufid_flags)) {
 840                err = ovs_nla_put_masked_key(flow, skb);
 841                if (err)
 842                        goto error;
 843        }
 844
 845        if (should_fill_mask(ufid_flags)) {
 846                err = ovs_nla_put_mask(flow, skb);
 847                if (err)
 848                        goto error;
 849        }
 850
 851        err = ovs_flow_cmd_fill_stats(flow, skb);
 852        if (err)
 853                goto error;
 854
 855        if (should_fill_actions(ufid_flags)) {
 856                err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
 857                if (err)
 858                        goto error;
 859        }
 860
 861        genlmsg_end(skb, ovs_header);
 862        return 0;
 863
 864error:
 865        genlmsg_cancel(skb, ovs_header);
 866        return err;
 867}
 868
 869/* May not be called with RCU read lock. */
 870static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
 871                                               const struct sw_flow_id *sfid,
 872                                               struct genl_info *info,
 873                                               bool always,
 874                                               uint32_t ufid_flags)
 875{
 876        struct sk_buff *skb;
 877        size_t len;
 878
 879        if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 880                return NULL;
 881
 882        len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
 883        skb = genlmsg_new(len, GFP_KERNEL);
 884        if (!skb)
 885                return ERR_PTR(-ENOMEM);
 886
 887        return skb;
 888}
 889
 890/* Called with ovs_mutex. */
 891static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 892                                               int dp_ifindex,
 893                                               struct genl_info *info, u8 cmd,
 894                                               bool always, u32 ufid_flags)
 895{
 896        struct sk_buff *skb;
 897        int retval;
 898
 899        skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
 900                                      &flow->id, info, always, ufid_flags);
 901        if (IS_ERR_OR_NULL(skb))
 902                return skb;
 903
 904        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 905                                        info->snd_portid, info->snd_seq, 0,
 906                                        cmd, ufid_flags);
 907        if (WARN_ON_ONCE(retval < 0)) {
 908                kfree_skb(skb);
 909                skb = ERR_PTR(retval);
 910        }
 911        return skb;
 912}
 913
 914static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 915{
 916        struct net *net = sock_net(skb->sk);
 917        struct nlattr **a = info->attrs;
 918        struct ovs_header *ovs_header = info->userhdr;
 919        struct sw_flow *flow = NULL, *new_flow;
 920        struct sw_flow_mask mask;
 921        struct sk_buff *reply;
 922        struct datapath *dp;
 923        struct sw_flow_actions *acts;
 924        struct sw_flow_match match;
 925        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 926        int error;
 927        bool log = !a[OVS_FLOW_ATTR_PROBE];
 928
 929        /* Must have key and actions. */
 930        error = -EINVAL;
 931        if (!a[OVS_FLOW_ATTR_KEY]) {
 932                OVS_NLERR(log, "Flow key attr not present in new flow.");
 933                goto error;
 934        }
 935        if (!a[OVS_FLOW_ATTR_ACTIONS]) {
 936                OVS_NLERR(log, "Flow actions attr not present in new flow.");
 937                goto error;
 938        }
 939
 940        /* Most of the time we need to allocate a new flow, do it before
 941         * locking.
 942         */
 943        new_flow = ovs_flow_alloc();
 944        if (IS_ERR(new_flow)) {
 945                error = PTR_ERR(new_flow);
 946                goto error;
 947        }
 948
 949        /* Extract key. */
 950        ovs_match_init(&match, &new_flow->key, false, &mask);
 951        error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 952                                  a[OVS_FLOW_ATTR_MASK], log);
 953        if (error)
 954                goto err_kfree_flow;
 955
 956        /* Extract flow identifier. */
 957        error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
 958                                       &new_flow->key, log);
 959        if (error)
 960                goto err_kfree_flow;
 961
 962        /* unmasked key is needed to match when ufid is not used. */
 963        if (ovs_identifier_is_key(&new_flow->id))
 964                match.key = new_flow->id.unmasked_key;
 965
 966        ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
 967
 968        /* Validate actions. */
 969        error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 970                                     &new_flow->key, &acts, log);
 971        if (error) {
 972                OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
 973                goto err_kfree_flow;
 974        }
 975
 976        reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
 977                                        ufid_flags);
 978        if (IS_ERR(reply)) {
 979                error = PTR_ERR(reply);
 980                goto err_kfree_acts;
 981        }
 982
 983        ovs_lock();
 984        dp = get_dp(net, ovs_header->dp_ifindex);
 985        if (unlikely(!dp)) {
 986                error = -ENODEV;
 987                goto err_unlock_ovs;
 988        }
 989
 990        /* Check if this is a duplicate flow */
 991        if (ovs_identifier_is_ufid(&new_flow->id))
 992                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
 993        if (!flow)
 994                flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
 995        if (likely(!flow)) {
 996                rcu_assign_pointer(new_flow->sf_acts, acts);
 997
 998                /* Put flow in bucket. */
 999                error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1000                if (unlikely(error)) {
1001                        acts = NULL;
1002                        goto err_unlock_ovs;
1003                }
1004
1005                if (unlikely(reply)) {
1006                        error = ovs_flow_cmd_fill_info(new_flow,
1007                                                       ovs_header->dp_ifindex,
1008                                                       reply, info->snd_portid,
1009                                                       info->snd_seq, 0,
1010                                                       OVS_FLOW_CMD_NEW,
1011                                                       ufid_flags);
1012                        BUG_ON(error < 0);
1013                }
1014                ovs_unlock();
1015        } else {
1016                struct sw_flow_actions *old_acts;
1017
1018                /* Bail out if we're not allowed to modify an existing flow.
1019                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1020                 * because Generic Netlink treats the latter as a dump
1021                 * request.  We also accept NLM_F_EXCL in case that bug ever
1022                 * gets fixed.
1023                 */
1024                if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1025                                                         | NLM_F_EXCL))) {
1026                        error = -EEXIST;
1027                        goto err_unlock_ovs;
1028                }
1029                /* The flow identifier has to be the same for flow updates.
1030                 * Look for any overlapping flow.
1031                 */
1032                if (unlikely(!ovs_flow_cmp(flow, &match))) {
1033                        if (ovs_identifier_is_key(&flow->id))
1034                                flow = ovs_flow_tbl_lookup_exact(&dp->table,
1035                                                                 &match);
1036                        else /* UFID matches but key is different */
1037                                flow = NULL;
1038                        if (!flow) {
1039                                error = -ENOENT;
1040                                goto err_unlock_ovs;
1041                        }
1042                }
1043                /* Update actions. */
1044                old_acts = ovsl_dereference(flow->sf_acts);
1045                rcu_assign_pointer(flow->sf_acts, acts);
1046
1047                if (unlikely(reply)) {
1048                        error = ovs_flow_cmd_fill_info(flow,
1049                                                       ovs_header->dp_ifindex,
1050                                                       reply, info->snd_portid,
1051                                                       info->snd_seq, 0,
1052                                                       OVS_FLOW_CMD_NEW,
1053                                                       ufid_flags);
1054                        BUG_ON(error < 0);
1055                }
1056                ovs_unlock();
1057
1058                ovs_nla_free_flow_actions_rcu(old_acts);
1059                ovs_flow_free(new_flow, false);
1060        }
1061
1062        if (reply)
1063                ovs_notify(&dp_flow_genl_family, reply, info);
1064        return 0;
1065
1066err_unlock_ovs:
1067        ovs_unlock();
1068        kfree_skb(reply);
1069err_kfree_acts:
1070        ovs_nla_free_flow_actions(acts);
1071err_kfree_flow:
1072        ovs_flow_free(new_flow, false);
1073error:
1074        return error;
1075}
1076
1077/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1078static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net,
1079                                                const struct nlattr *a,
1080                                                const struct sw_flow_key *key,
1081                                                const struct sw_flow_mask *mask,
1082                                                bool log)
1083{
1084        struct sw_flow_actions *acts;
1085        struct sw_flow_key masked_key;
1086        int error;
1087
1088        ovs_flow_mask_key(&masked_key, key, true, mask);
1089        error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1090        if (error) {
1091                OVS_NLERR(log,
1092                          "Actions may not be safe on all matching packets");
1093                return ERR_PTR(error);
1094        }
1095
1096        return acts;
1097}
1098
1099/* Factor out match-init and action-copy to avoid
1100 * "Wframe-larger-than=1024" warning. Because mask is only
1101 * used to get actions, we new a function to save some
1102 * stack space.
1103 *
1104 * If there are not key and action attrs, we return 0
1105 * directly. In the case, the caller will also not use the
1106 * match as before. If there is action attr, we try to get
1107 * actions and save them to *acts. Before returning from
1108 * the function, we reset the match->mask pointer. Because
1109 * we should not to return match object with dangling reference
1110 * to mask.
1111 * */
1112static noinline_for_stack int
1113ovs_nla_init_match_and_action(struct net *net,
1114                              struct sw_flow_match *match,
1115                              struct sw_flow_key *key,
1116                              struct nlattr **a,
1117                              struct sw_flow_actions **acts,
1118                              bool log)
1119{
1120        struct sw_flow_mask mask;
1121        int error = 0;
1122
1123        if (a[OVS_FLOW_ATTR_KEY]) {
1124                ovs_match_init(match, key, true, &mask);
1125                error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1126                                          a[OVS_FLOW_ATTR_MASK], log);
1127                if (error)
1128                        goto error;
1129        }
1130
1131        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1132                if (!a[OVS_FLOW_ATTR_KEY]) {
1133                        OVS_NLERR(log,
1134                                  "Flow key attribute not present in set flow.");
1135                        error = -EINVAL;
1136                        goto error;
1137                }
1138
1139                *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1140                                         &mask, log);
1141                if (IS_ERR(*acts)) {
1142                        error = PTR_ERR(*acts);
1143                        goto error;
1144                }
1145        }
1146
1147        /* On success, error is 0. */
1148error:
1149        match->mask = NULL;
1150        return error;
1151}
1152
1153static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1154{
1155        struct net *net = sock_net(skb->sk);
1156        struct nlattr **a = info->attrs;
1157        struct ovs_header *ovs_header = info->userhdr;
1158        struct sw_flow_key key;
1159        struct sw_flow *flow;
1160        struct sk_buff *reply = NULL;
1161        struct datapath *dp;
1162        struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1163        struct sw_flow_match match;
1164        struct sw_flow_id sfid;
1165        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1166        int error = 0;
1167        bool log = !a[OVS_FLOW_ATTR_PROBE];
1168        bool ufid_present;
1169
1170        ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1171        if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1172                OVS_NLERR(log,
1173                          "Flow set message rejected, Key attribute missing.");
1174                return -EINVAL;
1175        }
1176
1177        error = ovs_nla_init_match_and_action(net, &match, &key, a,
1178                                              &acts, log);
1179        if (error)
1180                goto error;
1181
1182        if (acts) {
1183                /* Can allocate before locking if have acts. */
1184                reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1185                                                ufid_flags);
1186                if (IS_ERR(reply)) {
1187                        error = PTR_ERR(reply);
1188                        goto err_kfree_acts;
1189                }
1190        }
1191
1192        ovs_lock();
1193        dp = get_dp(net, ovs_header->dp_ifindex);
1194        if (unlikely(!dp)) {
1195                error = -ENODEV;
1196                goto err_unlock_ovs;
1197        }
1198        /* Check that the flow exists. */
1199        if (ufid_present)
1200                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1201        else
1202                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1203        if (unlikely(!flow)) {
1204                error = -ENOENT;
1205                goto err_unlock_ovs;
1206        }
1207
1208        /* Update actions, if present. */
1209        if (likely(acts)) {
1210                old_acts = ovsl_dereference(flow->sf_acts);
1211                rcu_assign_pointer(flow->sf_acts, acts);
1212
1213                if (unlikely(reply)) {
1214                        error = ovs_flow_cmd_fill_info(flow,
1215                                                       ovs_header->dp_ifindex,
1216                                                       reply, info->snd_portid,
1217                                                       info->snd_seq, 0,
1218                                                       OVS_FLOW_CMD_SET,
1219                                                       ufid_flags);
1220                        BUG_ON(error < 0);
1221                }
1222        } else {
1223                /* Could not alloc without acts before locking. */
1224                reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1225                                                info, OVS_FLOW_CMD_SET, false,
1226                                                ufid_flags);
1227
1228                if (IS_ERR(reply)) {
1229                        error = PTR_ERR(reply);
1230                        goto err_unlock_ovs;
1231                }
1232        }
1233
1234        /* Clear stats. */
1235        if (a[OVS_FLOW_ATTR_CLEAR])
1236                ovs_flow_stats_clear(flow);
1237        ovs_unlock();
1238
1239        if (reply)
1240                ovs_notify(&dp_flow_genl_family, reply, info);
1241        if (old_acts)
1242                ovs_nla_free_flow_actions_rcu(old_acts);
1243
1244        return 0;
1245
1246err_unlock_ovs:
1247        ovs_unlock();
1248        kfree_skb(reply);
1249err_kfree_acts:
1250        ovs_nla_free_flow_actions(acts);
1251error:
1252        return error;
1253}
1254
1255static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1256{
1257        struct nlattr **a = info->attrs;
1258        struct ovs_header *ovs_header = info->userhdr;
1259        struct net *net = sock_net(skb->sk);
1260        struct sw_flow_key key;
1261        struct sk_buff *reply;
1262        struct sw_flow *flow;
1263        struct datapath *dp;
1264        struct sw_flow_match match;
1265        struct sw_flow_id ufid;
1266        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1267        int err = 0;
1268        bool log = !a[OVS_FLOW_ATTR_PROBE];
1269        bool ufid_present;
1270
1271        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1272        if (a[OVS_FLOW_ATTR_KEY]) {
1273                ovs_match_init(&match, &key, true, NULL);
1274                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1275                                        log);
1276        } else if (!ufid_present) {
1277                OVS_NLERR(log,
1278                          "Flow get message rejected, Key attribute missing.");
1279                err = -EINVAL;
1280        }
1281        if (err)
1282                return err;
1283
1284        ovs_lock();
1285        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1286        if (!dp) {
1287                err = -ENODEV;
1288                goto unlock;
1289        }
1290
1291        if (ufid_present)
1292                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1293        else
1294                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1295        if (!flow) {
1296                err = -ENOENT;
1297                goto unlock;
1298        }
1299
1300        reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1301                                        OVS_FLOW_CMD_GET, true, ufid_flags);
1302        if (IS_ERR(reply)) {
1303                err = PTR_ERR(reply);
1304                goto unlock;
1305        }
1306
1307        ovs_unlock();
1308        return genlmsg_reply(reply, info);
1309unlock:
1310        ovs_unlock();
1311        return err;
1312}
1313
1314static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1315{
1316        struct nlattr **a = info->attrs;
1317        struct ovs_header *ovs_header = info->userhdr;
1318        struct net *net = sock_net(skb->sk);
1319        struct sw_flow_key key;
1320        struct sk_buff *reply;
1321        struct sw_flow *flow = NULL;
1322        struct datapath *dp;
1323        struct sw_flow_match match;
1324        struct sw_flow_id ufid;
1325        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1326        int err;
1327        bool log = !a[OVS_FLOW_ATTR_PROBE];
1328        bool ufid_present;
1329
1330        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1331        if (a[OVS_FLOW_ATTR_KEY]) {
1332                ovs_match_init(&match, &key, true, NULL);
1333                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1334                                        NULL, log);
1335                if (unlikely(err))
1336                        return err;
1337        }
1338
1339        ovs_lock();
1340        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1341        if (unlikely(!dp)) {
1342                err = -ENODEV;
1343                goto unlock;
1344        }
1345
1346        if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1347                err = ovs_flow_tbl_flush(&dp->table);
1348                goto unlock;
1349        }
1350
1351        if (ufid_present)
1352                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1353        else
1354                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1355        if (unlikely(!flow)) {
1356                err = -ENOENT;
1357                goto unlock;
1358        }
1359
1360        ovs_flow_tbl_remove(&dp->table, flow);
1361        ovs_unlock();
1362
1363        reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1364                                        &flow->id, info, false, ufid_flags);
1365        if (likely(reply)) {
1366                if (!IS_ERR(reply)) {
1367                        rcu_read_lock();        /*To keep RCU checker happy. */
1368                        err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1369                                                     reply, info->snd_portid,
1370                                                     info->snd_seq, 0,
1371                                                     OVS_FLOW_CMD_DEL,
1372                                                     ufid_flags);
1373                        rcu_read_unlock();
1374                        if (WARN_ON_ONCE(err < 0)) {
1375                                kfree_skb(reply);
1376                                goto out_free;
1377                        }
1378
1379                        ovs_notify(&dp_flow_genl_family, reply, info);
1380                } else {
1381                        netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1382                }
1383        }
1384
1385out_free:
1386        ovs_flow_free(flow, true);
1387        return 0;
1388unlock:
1389        ovs_unlock();
1390        return err;
1391}
1392
1393static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1394{
1395        struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1396        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1397        struct table_instance *ti;
1398        struct datapath *dp;
1399        u32 ufid_flags;
1400        int err;
1401
1402        err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1403                                       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1404        if (err)
1405                return err;
1406        ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1407
1408        rcu_read_lock();
1409        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1410        if (!dp) {
1411                rcu_read_unlock();
1412                return -ENODEV;
1413        }
1414
1415        ti = rcu_dereference(dp->table.ti);
1416        for (;;) {
1417                struct sw_flow *flow;
1418                u32 bucket, obj;
1419
1420                bucket = cb->args[0];
1421                obj = cb->args[1];
1422                flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1423                if (!flow)
1424                        break;
1425
1426                if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1427                                           NETLINK_CB(cb->skb).portid,
1428                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1429                                           OVS_FLOW_CMD_GET, ufid_flags) < 0)
1430                        break;
1431
1432                cb->args[0] = bucket;
1433                cb->args[1] = obj;
1434        }
1435        rcu_read_unlock();
1436        return skb->len;
1437}
1438
1439static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1440        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1441        [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1442        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1443        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1444        [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1445        [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1446        [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1447};
1448
1449static const struct genl_ops dp_flow_genl_ops[] = {
1450        { .cmd = OVS_FLOW_CMD_NEW,
1451          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1452          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1453          .doit = ovs_flow_cmd_new
1454        },
1455        { .cmd = OVS_FLOW_CMD_DEL,
1456          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1457          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1458          .doit = ovs_flow_cmd_del
1459        },
1460        { .cmd = OVS_FLOW_CMD_GET,
1461          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1462          .flags = 0,               /* OK for unprivileged users. */
1463          .doit = ovs_flow_cmd_get,
1464          .dumpit = ovs_flow_cmd_dump
1465        },
1466        { .cmd = OVS_FLOW_CMD_SET,
1467          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1468          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1469          .doit = ovs_flow_cmd_set,
1470        },
1471};
1472
1473static struct genl_family dp_flow_genl_family __ro_after_init = {
1474        .hdrsize = sizeof(struct ovs_header),
1475        .name = OVS_FLOW_FAMILY,
1476        .version = OVS_FLOW_VERSION,
1477        .maxattr = OVS_FLOW_ATTR_MAX,
1478        .policy = flow_policy,
1479        .netnsok = true,
1480        .parallel_ops = true,
1481        .ops = dp_flow_genl_ops,
1482        .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1483        .mcgrps = &ovs_dp_flow_multicast_group,
1484        .n_mcgrps = 1,
1485        .module = THIS_MODULE,
1486};
1487
1488static size_t ovs_dp_cmd_msg_size(void)
1489{
1490        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1491
1492        msgsize += nla_total_size(IFNAMSIZ);
1493        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1494        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1495        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1496
1497        return msgsize;
1498}
1499
1500/* Called with ovs_mutex. */
1501static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1502                                u32 portid, u32 seq, u32 flags, u8 cmd)
1503{
1504        struct ovs_header *ovs_header;
1505        struct ovs_dp_stats dp_stats;
1506        struct ovs_dp_megaflow_stats dp_megaflow_stats;
1507        int err;
1508
1509        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1510                                   flags, cmd);
1511        if (!ovs_header)
1512                goto error;
1513
1514        ovs_header->dp_ifindex = get_dpifindex(dp);
1515
1516        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1517        if (err)
1518                goto nla_put_failure;
1519
1520        get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1521        if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1522                          &dp_stats, OVS_DP_ATTR_PAD))
1523                goto nla_put_failure;
1524
1525        if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1526                          sizeof(struct ovs_dp_megaflow_stats),
1527                          &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1528                goto nla_put_failure;
1529
1530        if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1531                goto nla_put_failure;
1532
1533        genlmsg_end(skb, ovs_header);
1534        return 0;
1535
1536nla_put_failure:
1537        genlmsg_cancel(skb, ovs_header);
1538error:
1539        return -EMSGSIZE;
1540}
1541
1542static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1543{
1544        return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1545}
1546
1547/* Called with rcu_read_lock or ovs_mutex. */
1548static struct datapath *lookup_datapath(struct net *net,
1549                                        const struct ovs_header *ovs_header,
1550                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1551{
1552        struct datapath *dp;
1553
1554        if (!a[OVS_DP_ATTR_NAME])
1555                dp = get_dp(net, ovs_header->dp_ifindex);
1556        else {
1557                struct vport *vport;
1558
1559                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1560                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1561        }
1562        return dp ? dp : ERR_PTR(-ENODEV);
1563}
1564
1565static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1566{
1567        struct datapath *dp;
1568
1569        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1570        if (IS_ERR(dp))
1571                return;
1572
1573        WARN(dp->user_features, "Dropping previously announced user features\n");
1574        dp->user_features = 0;
1575}
1576
1577DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
1578
1579static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1580{
1581        u32 user_features = 0;
1582
1583        if (a[OVS_DP_ATTR_USER_FEATURES]) {
1584                user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1585
1586                if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1587                                      OVS_DP_F_UNALIGNED |
1588                                      OVS_DP_F_TC_RECIRC_SHARING))
1589                        return -EOPNOTSUPP;
1590
1591#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1592                if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1593                        return -EOPNOTSUPP;
1594#endif
1595        }
1596
1597        dp->user_features = user_features;
1598
1599        if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1600                static_branch_enable(&tc_recirc_sharing_support);
1601        else
1602                static_branch_disable(&tc_recirc_sharing_support);
1603
1604        return 0;
1605}
1606
1607static int ovs_dp_stats_init(struct datapath *dp)
1608{
1609        dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1610        if (!dp->stats_percpu)
1611                return -ENOMEM;
1612
1613        return 0;
1614}
1615
1616static int ovs_dp_vport_init(struct datapath *dp)
1617{
1618        int i;
1619
1620        dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1621                                  sizeof(struct hlist_head),
1622                                  GFP_KERNEL);
1623        if (!dp->ports)
1624                return -ENOMEM;
1625
1626        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1627                INIT_HLIST_HEAD(&dp->ports[i]);
1628
1629        return 0;
1630}
1631
1632static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1633{
1634        struct nlattr **a = info->attrs;
1635        struct vport_parms parms;
1636        struct sk_buff *reply;
1637        struct datapath *dp;
1638        struct vport *vport;
1639        struct ovs_net *ovs_net;
1640        int err;
1641
1642        err = -EINVAL;
1643        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1644                goto err;
1645
1646        reply = ovs_dp_cmd_alloc_info();
1647        if (!reply)
1648                return -ENOMEM;
1649
1650        err = -ENOMEM;
1651        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1652        if (dp == NULL)
1653                goto err_destroy_reply;
1654
1655        ovs_dp_set_net(dp, sock_net(skb->sk));
1656
1657        /* Allocate table. */
1658        err = ovs_flow_tbl_init(&dp->table);
1659        if (err)
1660                goto err_destroy_dp;
1661
1662        err = ovs_dp_stats_init(dp);
1663        if (err)
1664                goto err_destroy_table;
1665
1666        err = ovs_dp_vport_init(dp);
1667        if (err)
1668                goto err_destroy_stats;
1669
1670        err = ovs_meters_init(dp);
1671        if (err)
1672                goto err_destroy_ports;
1673
1674        /* Set up our datapath device. */
1675        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1676        parms.type = OVS_VPORT_TYPE_INTERNAL;
1677        parms.options = NULL;
1678        parms.dp = dp;
1679        parms.port_no = OVSP_LOCAL;
1680        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1681
1682        err = ovs_dp_change(dp, a);
1683        if (err)
1684                goto err_destroy_meters;
1685
1686        /* So far only local changes have been made, now need the lock. */
1687        ovs_lock();
1688
1689        vport = new_vport(&parms);
1690        if (IS_ERR(vport)) {
1691                err = PTR_ERR(vport);
1692                if (err == -EBUSY)
1693                        err = -EEXIST;
1694
1695                if (err == -EEXIST) {
1696                        /* An outdated user space instance that does not understand
1697                         * the concept of user_features has attempted to create a new
1698                         * datapath and is likely to reuse it. Drop all user features.
1699                         */
1700                        if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1701                                ovs_dp_reset_user_features(skb, info);
1702                }
1703
1704                ovs_unlock();
1705                goto err_destroy_meters;
1706        }
1707
1708        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1709                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1710        BUG_ON(err < 0);
1711
1712        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1713        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1714
1715        ovs_unlock();
1716
1717        ovs_notify(&dp_datapath_genl_family, reply, info);
1718        return 0;
1719
1720err_destroy_meters:
1721        ovs_meters_exit(dp);
1722err_destroy_ports:
1723        kfree(dp->ports);
1724err_destroy_stats:
1725        free_percpu(dp->stats_percpu);
1726err_destroy_table:
1727        ovs_flow_tbl_destroy(&dp->table);
1728err_destroy_dp:
1729        kfree(dp);
1730err_destroy_reply:
1731        kfree_skb(reply);
1732err:
1733        return err;
1734}
1735
1736/* Called with ovs_mutex. */
1737static void __dp_destroy(struct datapath *dp)
1738{
1739        int i;
1740
1741        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1742                struct vport *vport;
1743                struct hlist_node *n;
1744
1745                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1746                        if (vport->port_no != OVSP_LOCAL)
1747                                ovs_dp_detach_port(vport);
1748        }
1749
1750        list_del_rcu(&dp->list_node);
1751
1752        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1753         * all ports in datapath are destroyed first before freeing datapath.
1754         */
1755        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1756
1757        /* RCU destroy the flow table */
1758        call_rcu(&dp->rcu, destroy_dp_rcu);
1759}
1760
1761static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1762{
1763        struct sk_buff *reply;
1764        struct datapath *dp;
1765        int err;
1766
1767        reply = ovs_dp_cmd_alloc_info();
1768        if (!reply)
1769                return -ENOMEM;
1770
1771        ovs_lock();
1772        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1773        err = PTR_ERR(dp);
1774        if (IS_ERR(dp))
1775                goto err_unlock_free;
1776
1777        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1778                                   info->snd_seq, 0, OVS_DP_CMD_DEL);
1779        BUG_ON(err < 0);
1780
1781        __dp_destroy(dp);
1782        ovs_unlock();
1783
1784        ovs_notify(&dp_datapath_genl_family, reply, info);
1785
1786        return 0;
1787
1788err_unlock_free:
1789        ovs_unlock();
1790        kfree_skb(reply);
1791        return err;
1792}
1793
1794static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1795{
1796        struct sk_buff *reply;
1797        struct datapath *dp;
1798        int err;
1799
1800        reply = ovs_dp_cmd_alloc_info();
1801        if (!reply)
1802                return -ENOMEM;
1803
1804        ovs_lock();
1805        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1806        err = PTR_ERR(dp);
1807        if (IS_ERR(dp))
1808                goto err_unlock_free;
1809
1810        err = ovs_dp_change(dp, info->attrs);
1811        if (err)
1812                goto err_unlock_free;
1813
1814        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1815                                   info->snd_seq, 0, OVS_DP_CMD_SET);
1816        BUG_ON(err < 0);
1817
1818        ovs_unlock();
1819        ovs_notify(&dp_datapath_genl_family, reply, info);
1820
1821        return 0;
1822
1823err_unlock_free:
1824        ovs_unlock();
1825        kfree_skb(reply);
1826        return err;
1827}
1828
1829static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1830{
1831        struct sk_buff *reply;
1832        struct datapath *dp;
1833        int err;
1834
1835        reply = ovs_dp_cmd_alloc_info();
1836        if (!reply)
1837                return -ENOMEM;
1838
1839        ovs_lock();
1840        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1841        if (IS_ERR(dp)) {
1842                err = PTR_ERR(dp);
1843                goto err_unlock_free;
1844        }
1845        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1846                                   info->snd_seq, 0, OVS_DP_CMD_GET);
1847        BUG_ON(err < 0);
1848        ovs_unlock();
1849
1850        return genlmsg_reply(reply, info);
1851
1852err_unlock_free:
1853        ovs_unlock();
1854        kfree_skb(reply);
1855        return err;
1856}
1857
1858static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1859{
1860        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1861        struct datapath *dp;
1862        int skip = cb->args[0];
1863        int i = 0;
1864
1865        ovs_lock();
1866        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1867                if (i >= skip &&
1868                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1869                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1870                                         OVS_DP_CMD_GET) < 0)
1871                        break;
1872                i++;
1873        }
1874        ovs_unlock();
1875
1876        cb->args[0] = i;
1877
1878        return skb->len;
1879}
1880
1881static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1882        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1883        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1884        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1885};
1886
1887static const struct genl_ops dp_datapath_genl_ops[] = {
1888        { .cmd = OVS_DP_CMD_NEW,
1889          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1890          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1891          .doit = ovs_dp_cmd_new
1892        },
1893        { .cmd = OVS_DP_CMD_DEL,
1894          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1895          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1896          .doit = ovs_dp_cmd_del
1897        },
1898        { .cmd = OVS_DP_CMD_GET,
1899          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1900          .flags = 0,               /* OK for unprivileged users. */
1901          .doit = ovs_dp_cmd_get,
1902          .dumpit = ovs_dp_cmd_dump
1903        },
1904        { .cmd = OVS_DP_CMD_SET,
1905          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1906          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1907          .doit = ovs_dp_cmd_set,
1908        },
1909};
1910
1911static struct genl_family dp_datapath_genl_family __ro_after_init = {
1912        .hdrsize = sizeof(struct ovs_header),
1913        .name = OVS_DATAPATH_FAMILY,
1914        .version = OVS_DATAPATH_VERSION,
1915        .maxattr = OVS_DP_ATTR_MAX,
1916        .policy = datapath_policy,
1917        .netnsok = true,
1918        .parallel_ops = true,
1919        .ops = dp_datapath_genl_ops,
1920        .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1921        .mcgrps = &ovs_dp_datapath_multicast_group,
1922        .n_mcgrps = 1,
1923        .module = THIS_MODULE,
1924};
1925
1926/* Called with ovs_mutex or RCU read lock. */
1927static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1928                                   struct net *net, u32 portid, u32 seq,
1929                                   u32 flags, u8 cmd, gfp_t gfp)
1930{
1931        struct ovs_header *ovs_header;
1932        struct ovs_vport_stats vport_stats;
1933        int err;
1934
1935        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1936                                 flags, cmd);
1937        if (!ovs_header)
1938                return -EMSGSIZE;
1939
1940        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1941
1942        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1943            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1944            nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1945                           ovs_vport_name(vport)) ||
1946            nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
1947                goto nla_put_failure;
1948
1949        if (!net_eq(net, dev_net(vport->dev))) {
1950                int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
1951
1952                if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
1953                        goto nla_put_failure;
1954        }
1955
1956        ovs_vport_get_stats(vport, &vport_stats);
1957        if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1958                          sizeof(struct ovs_vport_stats), &vport_stats,
1959                          OVS_VPORT_ATTR_PAD))
1960                goto nla_put_failure;
1961
1962        if (ovs_vport_get_upcall_portids(vport, skb))
1963                goto nla_put_failure;
1964
1965        err = ovs_vport_get_options(vport, skb);
1966        if (err == -EMSGSIZE)
1967                goto error;
1968
1969        genlmsg_end(skb, ovs_header);
1970        return 0;
1971
1972nla_put_failure:
1973        err = -EMSGSIZE;
1974error:
1975        genlmsg_cancel(skb, ovs_header);
1976        return err;
1977}
1978
1979static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1980{
1981        return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1982}
1983
1984/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1985struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
1986                                         u32 portid, u32 seq, u8 cmd)
1987{
1988        struct sk_buff *skb;
1989        int retval;
1990
1991        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1992        if (!skb)
1993                return ERR_PTR(-ENOMEM);
1994
1995        retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
1996                                         GFP_KERNEL);
1997        BUG_ON(retval < 0);
1998
1999        return skb;
2000}
2001
2002/* Called with ovs_mutex or RCU read lock. */
2003static struct vport *lookup_vport(struct net *net,
2004                                  const struct ovs_header *ovs_header,
2005                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2006{
2007        struct datapath *dp;
2008        struct vport *vport;
2009
2010        if (a[OVS_VPORT_ATTR_IFINDEX])
2011                return ERR_PTR(-EOPNOTSUPP);
2012        if (a[OVS_VPORT_ATTR_NAME]) {
2013                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2014                if (!vport)
2015                        return ERR_PTR(-ENODEV);
2016                if (ovs_header->dp_ifindex &&
2017                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2018                        return ERR_PTR(-ENODEV);
2019                return vport;
2020        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2021                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2022
2023                if (port_no >= DP_MAX_PORTS)
2024                        return ERR_PTR(-EFBIG);
2025
2026                dp = get_dp(net, ovs_header->dp_ifindex);
2027                if (!dp)
2028                        return ERR_PTR(-ENODEV);
2029
2030                vport = ovs_vport_ovsl_rcu(dp, port_no);
2031                if (!vport)
2032                        return ERR_PTR(-ENODEV);
2033                return vport;
2034        } else
2035                return ERR_PTR(-EINVAL);
2036
2037}
2038
2039static unsigned int ovs_get_max_headroom(struct datapath *dp)
2040{
2041        unsigned int dev_headroom, max_headroom = 0;
2042        struct net_device *dev;
2043        struct vport *vport;
2044        int i;
2045
2046        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2047                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2048                                        lockdep_ovsl_is_held()) {
2049                        dev = vport->dev;
2050                        dev_headroom = netdev_get_fwd_headroom(dev);
2051                        if (dev_headroom > max_headroom)
2052                                max_headroom = dev_headroom;
2053                }
2054        }
2055
2056        return max_headroom;
2057}
2058
2059/* Called with ovs_mutex */
2060static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2061{
2062        struct vport *vport;
2063        int i;
2064
2065        dp->max_headroom = new_headroom;
2066        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
2067                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2068                                        lockdep_ovsl_is_held())
2069                        netdev_set_rx_headroom(vport->dev, new_headroom);
2070}
2071
2072static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2073{
2074        struct nlattr **a = info->attrs;
2075        struct ovs_header *ovs_header = info->userhdr;
2076        struct vport_parms parms;
2077        struct sk_buff *reply;
2078        struct vport *vport;
2079        struct datapath *dp;
2080        unsigned int new_headroom;
2081        u32 port_no;
2082        int err;
2083
2084        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2085            !a[OVS_VPORT_ATTR_UPCALL_PID])
2086                return -EINVAL;
2087        if (a[OVS_VPORT_ATTR_IFINDEX])
2088                return -EOPNOTSUPP;
2089
2090        port_no = a[OVS_VPORT_ATTR_PORT_NO]
2091                ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2092        if (port_no >= DP_MAX_PORTS)
2093                return -EFBIG;
2094
2095        reply = ovs_vport_cmd_alloc_info();
2096        if (!reply)
2097                return -ENOMEM;
2098
2099        ovs_lock();
2100restart:
2101        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2102        err = -ENODEV;
2103        if (!dp)
2104                goto exit_unlock_free;
2105
2106        if (port_no) {
2107                vport = ovs_vport_ovsl(dp, port_no);
2108                err = -EBUSY;
2109                if (vport)
2110                        goto exit_unlock_free;
2111        } else {
2112                for (port_no = 1; ; port_no++) {
2113                        if (port_no >= DP_MAX_PORTS) {
2114                                err = -EFBIG;
2115                                goto exit_unlock_free;
2116                        }
2117                        vport = ovs_vport_ovsl(dp, port_no);
2118                        if (!vport)
2119                                break;
2120                }
2121        }
2122
2123        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2124        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2125        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2126        parms.dp = dp;
2127        parms.port_no = port_no;
2128        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2129
2130        vport = new_vport(&parms);
2131        err = PTR_ERR(vport);
2132        if (IS_ERR(vport)) {
2133                if (err == -EAGAIN)
2134                        goto restart;
2135                goto exit_unlock_free;
2136        }
2137
2138        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2139                                      info->snd_portid, info->snd_seq, 0,
2140                                      OVS_VPORT_CMD_NEW, GFP_KERNEL);
2141
2142        new_headroom = netdev_get_fwd_headroom(vport->dev);
2143
2144        if (new_headroom > dp->max_headroom)
2145                ovs_update_headroom(dp, new_headroom);
2146        else
2147                netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2148
2149        BUG_ON(err < 0);
2150        ovs_unlock();
2151
2152        ovs_notify(&dp_vport_genl_family, reply, info);
2153        return 0;
2154
2155exit_unlock_free:
2156        ovs_unlock();
2157        kfree_skb(reply);
2158        return err;
2159}
2160
2161static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2162{
2163        struct nlattr **a = info->attrs;
2164        struct sk_buff *reply;
2165        struct vport *vport;
2166        int err;
2167
2168        reply = ovs_vport_cmd_alloc_info();
2169        if (!reply)
2170                return -ENOMEM;
2171
2172        ovs_lock();
2173        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2174        err = PTR_ERR(vport);
2175        if (IS_ERR(vport))
2176                goto exit_unlock_free;
2177
2178        if (a[OVS_VPORT_ATTR_TYPE] &&
2179            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2180                err = -EINVAL;
2181                goto exit_unlock_free;
2182        }
2183
2184        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2185                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2186                if (err)
2187                        goto exit_unlock_free;
2188        }
2189
2190
2191        if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2192                struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2193
2194                err = ovs_vport_set_upcall_portids(vport, ids);
2195                if (err)
2196                        goto exit_unlock_free;
2197        }
2198
2199        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2200                                      info->snd_portid, info->snd_seq, 0,
2201                                      OVS_VPORT_CMD_SET, GFP_KERNEL);
2202        BUG_ON(err < 0);
2203
2204        ovs_unlock();
2205        ovs_notify(&dp_vport_genl_family, reply, info);
2206        return 0;
2207
2208exit_unlock_free:
2209        ovs_unlock();
2210        kfree_skb(reply);
2211        return err;
2212}
2213
2214static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2215{
2216        bool update_headroom = false;
2217        struct nlattr **a = info->attrs;
2218        struct sk_buff *reply;
2219        struct datapath *dp;
2220        struct vport *vport;
2221        unsigned int new_headroom;
2222        int err;
2223
2224        reply = ovs_vport_cmd_alloc_info();
2225        if (!reply)
2226                return -ENOMEM;
2227
2228        ovs_lock();
2229        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2230        err = PTR_ERR(vport);
2231        if (IS_ERR(vport))
2232                goto exit_unlock_free;
2233
2234        if (vport->port_no == OVSP_LOCAL) {
2235                err = -EINVAL;
2236                goto exit_unlock_free;
2237        }
2238
2239        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2240                                      info->snd_portid, info->snd_seq, 0,
2241                                      OVS_VPORT_CMD_DEL, GFP_KERNEL);
2242        BUG_ON(err < 0);
2243
2244        /* the vport deletion may trigger dp headroom update */
2245        dp = vport->dp;
2246        if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2247                update_headroom = true;
2248
2249        netdev_reset_rx_headroom(vport->dev);
2250        ovs_dp_detach_port(vport);
2251
2252        if (update_headroom) {
2253                new_headroom = ovs_get_max_headroom(dp);
2254
2255                if (new_headroom < dp->max_headroom)
2256                        ovs_update_headroom(dp, new_headroom);
2257        }
2258        ovs_unlock();
2259
2260        ovs_notify(&dp_vport_genl_family, reply, info);
2261        return 0;
2262
2263exit_unlock_free:
2264        ovs_unlock();
2265        kfree_skb(reply);
2266        return err;
2267}
2268
2269static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2270{
2271        struct nlattr **a = info->attrs;
2272        struct ovs_header *ovs_header = info->userhdr;
2273        struct sk_buff *reply;
2274        struct vport *vport;
2275        int err;
2276
2277        reply = ovs_vport_cmd_alloc_info();
2278        if (!reply)
2279                return -ENOMEM;
2280
2281        rcu_read_lock();
2282        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2283        err = PTR_ERR(vport);
2284        if (IS_ERR(vport))
2285                goto exit_unlock_free;
2286        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2287                                      info->snd_portid, info->snd_seq, 0,
2288                                      OVS_VPORT_CMD_GET, GFP_ATOMIC);
2289        BUG_ON(err < 0);
2290        rcu_read_unlock();
2291
2292        return genlmsg_reply(reply, info);
2293
2294exit_unlock_free:
2295        rcu_read_unlock();
2296        kfree_skb(reply);
2297        return err;
2298}
2299
2300static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2301{
2302        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2303        struct datapath *dp;
2304        int bucket = cb->args[0], skip = cb->args[1];
2305        int i, j = 0;
2306
2307        rcu_read_lock();
2308        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2309        if (!dp) {
2310                rcu_read_unlock();
2311                return -ENODEV;
2312        }
2313        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2314                struct vport *vport;
2315
2316                j = 0;
2317                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2318                        if (j >= skip &&
2319                            ovs_vport_cmd_fill_info(vport, skb,
2320                                                    sock_net(skb->sk),
2321                                                    NETLINK_CB(cb->skb).portid,
2322                                                    cb->nlh->nlmsg_seq,
2323                                                    NLM_F_MULTI,
2324                                                    OVS_VPORT_CMD_GET,
2325                                                    GFP_ATOMIC) < 0)
2326                                goto out;
2327
2328                        j++;
2329                }
2330                skip = 0;
2331        }
2332out:
2333        rcu_read_unlock();
2334
2335        cb->args[0] = i;
2336        cb->args[1] = j;
2337
2338        return skb->len;
2339}
2340
2341static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2342        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2343        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2344        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2345        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2346        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2347        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2348        [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2349        [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2350};
2351
2352static const struct genl_ops dp_vport_genl_ops[] = {
2353        { .cmd = OVS_VPORT_CMD_NEW,
2354          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2355          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2356          .doit = ovs_vport_cmd_new
2357        },
2358        { .cmd = OVS_VPORT_CMD_DEL,
2359          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2360          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2361          .doit = ovs_vport_cmd_del
2362        },
2363        { .cmd = OVS_VPORT_CMD_GET,
2364          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2365          .flags = 0,               /* OK for unprivileged users. */
2366          .doit = ovs_vport_cmd_get,
2367          .dumpit = ovs_vport_cmd_dump
2368        },
2369        { .cmd = OVS_VPORT_CMD_SET,
2370          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2371          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2372          .doit = ovs_vport_cmd_set,
2373        },
2374};
2375
2376struct genl_family dp_vport_genl_family __ro_after_init = {
2377        .hdrsize = sizeof(struct ovs_header),
2378        .name = OVS_VPORT_FAMILY,
2379        .version = OVS_VPORT_VERSION,
2380        .maxattr = OVS_VPORT_ATTR_MAX,
2381        .policy = vport_policy,
2382        .netnsok = true,
2383        .parallel_ops = true,
2384        .ops = dp_vport_genl_ops,
2385        .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2386        .mcgrps = &ovs_dp_vport_multicast_group,
2387        .n_mcgrps = 1,
2388        .module = THIS_MODULE,
2389};
2390
2391static struct genl_family * const dp_genl_families[] = {
2392        &dp_datapath_genl_family,
2393        &dp_vport_genl_family,
2394        &dp_flow_genl_family,
2395        &dp_packet_genl_family,
2396        &dp_meter_genl_family,
2397#if     IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2398        &dp_ct_limit_genl_family,
2399#endif
2400};
2401
2402static void dp_unregister_genl(int n_families)
2403{
2404        int i;
2405
2406        for (i = 0; i < n_families; i++)
2407                genl_unregister_family(dp_genl_families[i]);
2408}
2409
2410static int __init dp_register_genl(void)
2411{
2412        int err;
2413        int i;
2414
2415        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2416
2417                err = genl_register_family(dp_genl_families[i]);
2418                if (err)
2419                        goto error;
2420        }
2421
2422        return 0;
2423
2424error:
2425        dp_unregister_genl(i);
2426        return err;
2427}
2428
2429static int __net_init ovs_init_net(struct net *net)
2430{
2431        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2432
2433        INIT_LIST_HEAD(&ovs_net->dps);
2434        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2435        return ovs_ct_init(net);
2436}
2437
2438static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2439                                            struct list_head *head)
2440{
2441        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2442        struct datapath *dp;
2443
2444        list_for_each_entry(dp, &ovs_net->dps, list_node) {
2445                int i;
2446
2447                for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2448                        struct vport *vport;
2449
2450                        hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2451                                if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2452                                        continue;
2453
2454                                if (dev_net(vport->dev) == dnet)
2455                                        list_add(&vport->detach_list, head);
2456                        }
2457                }
2458        }
2459}
2460
2461static void __net_exit ovs_exit_net(struct net *dnet)
2462{
2463        struct datapath *dp, *dp_next;
2464        struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2465        struct vport *vport, *vport_next;
2466        struct net *net;
2467        LIST_HEAD(head);
2468
2469        ovs_ct_exit(dnet);
2470        ovs_lock();
2471        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2472                __dp_destroy(dp);
2473
2474        down_read(&net_rwsem);
2475        for_each_net(net)
2476                list_vports_from_net(net, dnet, &head);
2477        up_read(&net_rwsem);
2478
2479        /* Detach all vports from given namespace. */
2480        list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2481                list_del(&vport->detach_list);
2482                ovs_dp_detach_port(vport);
2483        }
2484
2485        ovs_unlock();
2486
2487        cancel_work_sync(&ovs_net->dp_notify_work);
2488}
2489
2490static struct pernet_operations ovs_net_ops = {
2491        .init = ovs_init_net,
2492        .exit = ovs_exit_net,
2493        .id   = &ovs_net_id,
2494        .size = sizeof(struct ovs_net),
2495};
2496
2497static int __init dp_init(void)
2498{
2499        int err;
2500
2501        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb));
2502
2503        pr_info("Open vSwitch switching datapath\n");
2504
2505        err = action_fifos_init();
2506        if (err)
2507                goto error;
2508
2509        err = ovs_internal_dev_rtnl_link_register();
2510        if (err)
2511                goto error_action_fifos_exit;
2512
2513        err = ovs_flow_init();
2514        if (err)
2515                goto error_unreg_rtnl_link;
2516
2517        err = ovs_vport_init();
2518        if (err)
2519                goto error_flow_exit;
2520
2521        err = register_pernet_device(&ovs_net_ops);
2522        if (err)
2523                goto error_vport_exit;
2524
2525        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2526        if (err)
2527                goto error_netns_exit;
2528
2529        err = ovs_netdev_init();
2530        if (err)
2531                goto error_unreg_notifier;
2532
2533        err = dp_register_genl();
2534        if (err < 0)
2535                goto error_unreg_netdev;
2536
2537        return 0;
2538
2539error_unreg_netdev:
2540        ovs_netdev_exit();
2541error_unreg_notifier:
2542        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2543error_netns_exit:
2544        unregister_pernet_device(&ovs_net_ops);
2545error_vport_exit:
2546        ovs_vport_exit();
2547error_flow_exit:
2548        ovs_flow_exit();
2549error_unreg_rtnl_link:
2550        ovs_internal_dev_rtnl_link_unregister();
2551error_action_fifos_exit:
2552        action_fifos_exit();
2553error:
2554        return err;
2555}
2556
2557static void dp_cleanup(void)
2558{
2559        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2560        ovs_netdev_exit();
2561        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2562        unregister_pernet_device(&ovs_net_ops);
2563        rcu_barrier();
2564        ovs_vport_exit();
2565        ovs_flow_exit();
2566        ovs_internal_dev_rtnl_link_unregister();
2567        action_fifos_exit();
2568}
2569
2570module_init(dp_init);
2571module_exit(dp_cleanup);
2572
2573MODULE_DESCRIPTION("Open vSwitch switching datapath");
2574MODULE_LICENSE("GPL");
2575MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2576MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2577MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2578MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2579MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2580MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
2581