linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2007-2014 Nicira, Inc.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <linux/init.h>
   9#include <linux/module.h>
  10#include <linux/if_arp.h>
  11#include <linux/if_vlan.h>
  12#include <linux/in.h>
  13#include <linux/ip.h>
  14#include <linux/jhash.h>
  15#include <linux/delay.h>
  16#include <linux/time.h>
  17#include <linux/etherdevice.h>
  18#include <linux/genetlink.h>
  19#include <linux/kernel.h>
  20#include <linux/kthread.h>
  21#include <linux/mutex.h>
  22#include <linux/percpu.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/ethtool.h>
  27#include <linux/wait.h>
  28#include <asm/div64.h>
  29#include <linux/highmem.h>
  30#include <linux/netfilter_bridge.h>
  31#include <linux/netfilter_ipv4.h>
  32#include <linux/inetdevice.h>
  33#include <linux/list.h>
  34#include <linux/openvswitch.h>
  35#include <linux/rculist.h>
  36#include <linux/dmi.h>
  37#include <net/genetlink.h>
  38#include <net/net_namespace.h>
  39#include <net/netns/generic.h>
  40
  41#include "datapath.h"
  42#include "flow.h"
  43#include "flow_table.h"
  44#include "flow_netlink.h"
  45#include "meter.h"
  46#include "openvswitch_trace.h"
  47#include "vport-internal_dev.h"
  48#include "vport-netdev.h"
  49
  50unsigned int ovs_net_id __read_mostly;
  51
  52static struct genl_family dp_packet_genl_family;
  53static struct genl_family dp_flow_genl_family;
  54static struct genl_family dp_datapath_genl_family;
  55
  56static const struct nla_policy flow_policy[];
  57
  58static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
  59        .name = OVS_FLOW_MCGROUP,
  60};
  61
  62static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  63        .name = OVS_DATAPATH_MCGROUP,
  64};
  65
  66static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
  67        .name = OVS_VPORT_MCGROUP,
  68};
  69
  70/* Check if need to build a reply message.
  71 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  72static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
  73                            unsigned int group)
  74{
  75        return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  76               genl_has_listeners(family, genl_info_net(info), group);
  77}
  78
  79static void ovs_notify(struct genl_family *family,
  80                       struct sk_buff *skb, struct genl_info *info)
  81{
  82        genl_notify(family, skb, info, 0, GFP_KERNEL);
  83}
  84
  85/**
  86 * DOC: Locking:
  87 *
  88 * All writes e.g. Writes to device state (add/remove datapath, port, set
  89 * operations on vports, etc.), Writes to other state (flow table
  90 * modifications, set miscellaneous datapath parameters, etc.) are protected
  91 * by ovs_lock.
  92 *
  93 * Reads are protected by RCU.
  94 *
  95 * There are a few special cases (mostly stats) that have their own
  96 * synchronization but they nest under all of above and don't interact with
  97 * each other.
  98 *
  99 * The RTNL lock nests inside ovs_mutex.
 100 */
 101
 102static DEFINE_MUTEX(ovs_mutex);
 103
 104void ovs_lock(void)
 105{
 106        mutex_lock(&ovs_mutex);
 107}
 108
 109void ovs_unlock(void)
 110{
 111        mutex_unlock(&ovs_mutex);
 112}
 113
 114#ifdef CONFIG_LOCKDEP
 115int lockdep_ovsl_is_held(void)
 116{
 117        if (debug_locks)
 118                return lockdep_is_held(&ovs_mutex);
 119        else
 120                return 1;
 121}
 122#endif
 123
 124static struct vport *new_vport(const struct vport_parms *);
 125static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 126                             const struct sw_flow_key *,
 127                             const struct dp_upcall_info *,
 128                             uint32_t cutlen);
 129static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 130                                  const struct sw_flow_key *,
 131                                  const struct dp_upcall_info *,
 132                                  uint32_t cutlen);
 133
 134static void ovs_dp_masks_rebalance(struct work_struct *work);
 135
 136/* Must be called with rcu_read_lock or ovs_mutex. */
 137const char *ovs_dp_name(const struct datapath *dp)
 138{
 139        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 140        return ovs_vport_name(vport);
 141}
 142
 143static int get_dpifindex(const struct datapath *dp)
 144{
 145        struct vport *local;
 146        int ifindex;
 147
 148        rcu_read_lock();
 149
 150        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 151        if (local)
 152                ifindex = local->dev->ifindex;
 153        else
 154                ifindex = 0;
 155
 156        rcu_read_unlock();
 157
 158        return ifindex;
 159}
 160
 161static void destroy_dp_rcu(struct rcu_head *rcu)
 162{
 163        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 164
 165        ovs_flow_tbl_destroy(&dp->table);
 166        free_percpu(dp->stats_percpu);
 167        kfree(dp->ports);
 168        ovs_meters_exit(dp);
 169        kfree(dp);
 170}
 171
 172static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 173                                            u16 port_no)
 174{
 175        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 176}
 177
 178/* Called with ovs_mutex or RCU read lock. */
 179struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 180{
 181        struct vport *vport;
 182        struct hlist_head *head;
 183
 184        head = vport_hash_bucket(dp, port_no);
 185        hlist_for_each_entry_rcu(vport, head, dp_hash_node,
 186                                 lockdep_ovsl_is_held()) {
 187                if (vport->port_no == port_no)
 188                        return vport;
 189        }
 190        return NULL;
 191}
 192
 193/* Called with ovs_mutex. */
 194static struct vport *new_vport(const struct vport_parms *parms)
 195{
 196        struct vport *vport;
 197
 198        vport = ovs_vport_add(parms);
 199        if (!IS_ERR(vport)) {
 200                struct datapath *dp = parms->dp;
 201                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 202
 203                hlist_add_head_rcu(&vport->dp_hash_node, head);
 204        }
 205        return vport;
 206}
 207
 208void ovs_dp_detach_port(struct vport *p)
 209{
 210        ASSERT_OVSL();
 211
 212        /* First drop references to device. */
 213        hlist_del_rcu(&p->dp_hash_node);
 214
 215        /* Then destroy it. */
 216        ovs_vport_del(p);
 217}
 218
 219/* Must be called with rcu_read_lock. */
 220void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 221{
 222        const struct vport *p = OVS_CB(skb)->input_vport;
 223        struct datapath *dp = p->dp;
 224        struct sw_flow *flow;
 225        struct sw_flow_actions *sf_acts;
 226        struct dp_stats_percpu *stats;
 227        u64 *stats_counter;
 228        u32 n_mask_hit;
 229        u32 n_cache_hit;
 230        int error;
 231
 232        stats = this_cpu_ptr(dp->stats_percpu);
 233
 234        /* Look up flow. */
 235        flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
 236                                         &n_mask_hit, &n_cache_hit);
 237        if (unlikely(!flow)) {
 238                struct dp_upcall_info upcall;
 239
 240                memset(&upcall, 0, sizeof(upcall));
 241                upcall.cmd = OVS_PACKET_CMD_MISS;
 242                upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 243                upcall.mru = OVS_CB(skb)->mru;
 244                error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
 245                if (unlikely(error))
 246                        kfree_skb(skb);
 247                else
 248                        consume_skb(skb);
 249                stats_counter = &stats->n_missed;
 250                goto out;
 251        }
 252
 253        ovs_flow_stats_update(flow, key->tp.flags, skb);
 254        sf_acts = rcu_dereference(flow->sf_acts);
 255        error = ovs_execute_actions(dp, skb, sf_acts, key);
 256        if (unlikely(error))
 257                net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
 258                                    ovs_dp_name(dp), error);
 259
 260        stats_counter = &stats->n_hit;
 261
 262out:
 263        /* Update datapath statistics. */
 264        u64_stats_update_begin(&stats->syncp);
 265        (*stats_counter)++;
 266        stats->n_mask_hit += n_mask_hit;
 267        stats->n_cache_hit += n_cache_hit;
 268        u64_stats_update_end(&stats->syncp);
 269}
 270
 271int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 272                  const struct sw_flow_key *key,
 273                  const struct dp_upcall_info *upcall_info,
 274                  uint32_t cutlen)
 275{
 276        struct dp_stats_percpu *stats;
 277        int err;
 278
 279        if (trace_ovs_dp_upcall_enabled())
 280                trace_ovs_dp_upcall(dp, skb, key, upcall_info);
 281
 282        if (upcall_info->portid == 0) {
 283                err = -ENOTCONN;
 284                goto err;
 285        }
 286
 287        if (!skb_is_gso(skb))
 288                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 289        else
 290                err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
 291        if (err)
 292                goto err;
 293
 294        return 0;
 295
 296err:
 297        stats = this_cpu_ptr(dp->stats_percpu);
 298
 299        u64_stats_update_begin(&stats->syncp);
 300        stats->n_lost++;
 301        u64_stats_update_end(&stats->syncp);
 302
 303        return err;
 304}
 305
 306static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 307                             const struct sw_flow_key *key,
 308                             const struct dp_upcall_info *upcall_info,
 309                             uint32_t cutlen)
 310{
 311        unsigned int gso_type = skb_shinfo(skb)->gso_type;
 312        struct sw_flow_key later_key;
 313        struct sk_buff *segs, *nskb;
 314        int err;
 315
 316        BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
 317        segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 318        if (IS_ERR(segs))
 319                return PTR_ERR(segs);
 320        if (segs == NULL)
 321                return -EINVAL;
 322
 323        if (gso_type & SKB_GSO_UDP) {
 324                /* The initial flow key extracted by ovs_flow_key_extract()
 325                 * in this case is for a first fragment, so we need to
 326                 * properly mark later fragments.
 327                 */
 328                later_key = *key;
 329                later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 330        }
 331
 332        /* Queue all of the segments. */
 333        skb_list_walk_safe(segs, skb, nskb) {
 334                if (gso_type & SKB_GSO_UDP && skb != segs)
 335                        key = &later_key;
 336
 337                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 338                if (err)
 339                        break;
 340
 341        }
 342
 343        /* Free all of the segments. */
 344        skb_list_walk_safe(segs, skb, nskb) {
 345                if (err)
 346                        kfree_skb(skb);
 347                else
 348                        consume_skb(skb);
 349        }
 350        return err;
 351}
 352
 353static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 354                              unsigned int hdrlen, int actions_attrlen)
 355{
 356        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 357                + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
 358                + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
 359                + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
 360                + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
 361
 362        /* OVS_PACKET_ATTR_USERDATA */
 363        if (upcall_info->userdata)
 364                size += NLA_ALIGN(upcall_info->userdata->nla_len);
 365
 366        /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
 367        if (upcall_info->egress_tun_info)
 368                size += nla_total_size(ovs_tun_key_attr_size());
 369
 370        /* OVS_PACKET_ATTR_ACTIONS */
 371        if (upcall_info->actions_len)
 372                size += nla_total_size(actions_attrlen);
 373
 374        /* OVS_PACKET_ATTR_MRU */
 375        if (upcall_info->mru)
 376                size += nla_total_size(sizeof(upcall_info->mru));
 377
 378        return size;
 379}
 380
 381static void pad_packet(struct datapath *dp, struct sk_buff *skb)
 382{
 383        if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
 384                size_t plen = NLA_ALIGN(skb->len) - skb->len;
 385
 386                if (plen > 0)
 387                        skb_put_zero(skb, plen);
 388        }
 389}
 390
 391static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 392                                  const struct sw_flow_key *key,
 393                                  const struct dp_upcall_info *upcall_info,
 394                                  uint32_t cutlen)
 395{
 396        struct ovs_header *upcall;
 397        struct sk_buff *nskb = NULL;
 398        struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 399        struct nlattr *nla;
 400        size_t len;
 401        unsigned int hlen;
 402        int err, dp_ifindex;
 403        u64 hash;
 404
 405        dp_ifindex = get_dpifindex(dp);
 406        if (!dp_ifindex)
 407                return -ENODEV;
 408
 409        if (skb_vlan_tag_present(skb)) {
 410                nskb = skb_clone(skb, GFP_ATOMIC);
 411                if (!nskb)
 412                        return -ENOMEM;
 413
 414                nskb = __vlan_hwaccel_push_inside(nskb);
 415                if (!nskb)
 416                        return -ENOMEM;
 417
 418                skb = nskb;
 419        }
 420
 421        if (nla_attr_size(skb->len) > USHRT_MAX) {
 422                err = -EFBIG;
 423                goto out;
 424        }
 425
 426        /* Complete checksum if needed */
 427        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 428            (err = skb_csum_hwoffload_help(skb, 0)))
 429                goto out;
 430
 431        /* Older versions of OVS user space enforce alignment of the last
 432         * Netlink attribute to NLA_ALIGNTO which would require extensive
 433         * padding logic. Only perform zerocopy if padding is not required.
 434         */
 435        if (dp->user_features & OVS_DP_F_UNALIGNED)
 436                hlen = skb_zerocopy_headlen(skb);
 437        else
 438                hlen = skb->len;
 439
 440        len = upcall_msg_size(upcall_info, hlen - cutlen,
 441                              OVS_CB(skb)->acts_origlen);
 442        user_skb = genlmsg_new(len, GFP_ATOMIC);
 443        if (!user_skb) {
 444                err = -ENOMEM;
 445                goto out;
 446        }
 447
 448        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 449                             0, upcall_info->cmd);
 450        if (!upcall) {
 451                err = -EINVAL;
 452                goto out;
 453        }
 454        upcall->dp_ifindex = dp_ifindex;
 455
 456        err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 457        if (err)
 458                goto out;
 459
 460        if (upcall_info->userdata)
 461                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 462                          nla_len(upcall_info->userdata),
 463                          nla_data(upcall_info->userdata));
 464
 465        if (upcall_info->egress_tun_info) {
 466                nla = nla_nest_start_noflag(user_skb,
 467                                            OVS_PACKET_ATTR_EGRESS_TUN_KEY);
 468                if (!nla) {
 469                        err = -EMSGSIZE;
 470                        goto out;
 471                }
 472                err = ovs_nla_put_tunnel_info(user_skb,
 473                                              upcall_info->egress_tun_info);
 474                if (err)
 475                        goto out;
 476
 477                nla_nest_end(user_skb, nla);
 478        }
 479
 480        if (upcall_info->actions_len) {
 481                nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
 482                if (!nla) {
 483                        err = -EMSGSIZE;
 484                        goto out;
 485                }
 486                err = ovs_nla_put_actions(upcall_info->actions,
 487                                          upcall_info->actions_len,
 488                                          user_skb);
 489                if (!err)
 490                        nla_nest_end(user_skb, nla);
 491                else
 492                        nla_nest_cancel(user_skb, nla);
 493        }
 494
 495        /* Add OVS_PACKET_ATTR_MRU */
 496        if (upcall_info->mru &&
 497            nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
 498                err = -ENOBUFS;
 499                goto out;
 500        }
 501
 502        /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
 503        if (cutlen > 0 &&
 504            nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
 505                err = -ENOBUFS;
 506                goto out;
 507        }
 508
 509        /* Add OVS_PACKET_ATTR_HASH */
 510        hash = skb_get_hash_raw(skb);
 511        if (skb->sw_hash)
 512                hash |= OVS_PACKET_HASH_SW_BIT;
 513
 514        if (skb->l4_hash)
 515                hash |= OVS_PACKET_HASH_L4_BIT;
 516
 517        if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
 518                err = -ENOBUFS;
 519                goto out;
 520        }
 521
 522        /* Only reserve room for attribute header, packet data is added
 523         * in skb_zerocopy() */
 524        if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 525                err = -ENOBUFS;
 526                goto out;
 527        }
 528        nla->nla_len = nla_attr_size(skb->len - cutlen);
 529
 530        err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
 531        if (err)
 532                goto out;
 533
 534        /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
 535        pad_packet(dp, user_skb);
 536
 537        ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 538
 539        err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
 540        user_skb = NULL;
 541out:
 542        if (err)
 543                skb_tx_error(skb);
 544        kfree_skb(user_skb);
 545        kfree_skb(nskb);
 546        return err;
 547}
 548
 549static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 550{
 551        struct ovs_header *ovs_header = info->userhdr;
 552        struct net *net = sock_net(skb->sk);
 553        struct nlattr **a = info->attrs;
 554        struct sw_flow_actions *acts;
 555        struct sk_buff *packet;
 556        struct sw_flow *flow;
 557        struct sw_flow_actions *sf_acts;
 558        struct datapath *dp;
 559        struct vport *input_vport;
 560        u16 mru = 0;
 561        u64 hash;
 562        int len;
 563        int err;
 564        bool log = !a[OVS_PACKET_ATTR_PROBE];
 565
 566        err = -EINVAL;
 567        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 568            !a[OVS_PACKET_ATTR_ACTIONS])
 569                goto err;
 570
 571        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 572        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 573        err = -ENOMEM;
 574        if (!packet)
 575                goto err;
 576        skb_reserve(packet, NET_IP_ALIGN);
 577
 578        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 579
 580        /* Set packet's mru */
 581        if (a[OVS_PACKET_ATTR_MRU]) {
 582                mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
 583                packet->ignore_df = 1;
 584        }
 585        OVS_CB(packet)->mru = mru;
 586
 587        if (a[OVS_PACKET_ATTR_HASH]) {
 588                hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
 589
 590                __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
 591                               !!(hash & OVS_PACKET_HASH_SW_BIT),
 592                               !!(hash & OVS_PACKET_HASH_L4_BIT));
 593        }
 594
 595        /* Build an sw_flow for sending this packet. */
 596        flow = ovs_flow_alloc();
 597        err = PTR_ERR(flow);
 598        if (IS_ERR(flow))
 599                goto err_kfree_skb;
 600
 601        err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
 602                                             packet, &flow->key, log);
 603        if (err)
 604                goto err_flow_free;
 605
 606        err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
 607                                   &flow->key, &acts, log);
 608        if (err)
 609                goto err_flow_free;
 610
 611        rcu_assign_pointer(flow->sf_acts, acts);
 612        packet->priority = flow->key.phy.priority;
 613        packet->mark = flow->key.phy.skb_mark;
 614
 615        rcu_read_lock();
 616        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
 617        err = -ENODEV;
 618        if (!dp)
 619                goto err_unlock;
 620
 621        input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
 622        if (!input_vport)
 623                input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
 624
 625        if (!input_vport)
 626                goto err_unlock;
 627
 628        packet->dev = input_vport->dev;
 629        OVS_CB(packet)->input_vport = input_vport;
 630        sf_acts = rcu_dereference(flow->sf_acts);
 631
 632        local_bh_disable();
 633        err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
 634        local_bh_enable();
 635        rcu_read_unlock();
 636
 637        ovs_flow_free(flow, false);
 638        return err;
 639
 640err_unlock:
 641        rcu_read_unlock();
 642err_flow_free:
 643        ovs_flow_free(flow, false);
 644err_kfree_skb:
 645        kfree_skb(packet);
 646err:
 647        return err;
 648}
 649
 650static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 651        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 652        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 653        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 654        [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
 655        [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
 656        [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
 657};
 658
 659static const struct genl_small_ops dp_packet_genl_ops[] = {
 660        { .cmd = OVS_PACKET_CMD_EXECUTE,
 661          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 662          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 663          .doit = ovs_packet_cmd_execute
 664        }
 665};
 666
 667static struct genl_family dp_packet_genl_family __ro_after_init = {
 668        .hdrsize = sizeof(struct ovs_header),
 669        .name = OVS_PACKET_FAMILY,
 670        .version = OVS_PACKET_VERSION,
 671        .maxattr = OVS_PACKET_ATTR_MAX,
 672        .policy = packet_policy,
 673        .netnsok = true,
 674        .parallel_ops = true,
 675        .small_ops = dp_packet_genl_ops,
 676        .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
 677        .module = THIS_MODULE,
 678};
 679
 680static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 681                         struct ovs_dp_megaflow_stats *mega_stats)
 682{
 683        int i;
 684
 685        memset(mega_stats, 0, sizeof(*mega_stats));
 686
 687        stats->n_flows = ovs_flow_tbl_count(&dp->table);
 688        mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 689
 690        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 691
 692        for_each_possible_cpu(i) {
 693                const struct dp_stats_percpu *percpu_stats;
 694                struct dp_stats_percpu local_stats;
 695                unsigned int start;
 696
 697                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 698
 699                do {
 700                        start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
 701                        local_stats = *percpu_stats;
 702                } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
 703
 704                stats->n_hit += local_stats.n_hit;
 705                stats->n_missed += local_stats.n_missed;
 706                stats->n_lost += local_stats.n_lost;
 707                mega_stats->n_mask_hit += local_stats.n_mask_hit;
 708                mega_stats->n_cache_hit += local_stats.n_cache_hit;
 709        }
 710}
 711
 712static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
 713{
 714        return ovs_identifier_is_ufid(sfid) &&
 715               !(ufid_flags & OVS_UFID_F_OMIT_KEY);
 716}
 717
 718static bool should_fill_mask(uint32_t ufid_flags)
 719{
 720        return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
 721}
 722
 723static bool should_fill_actions(uint32_t ufid_flags)
 724{
 725        return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
 726}
 727
 728static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
 729                                    const struct sw_flow_id *sfid,
 730                                    uint32_t ufid_flags)
 731{
 732        size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 733
 734        /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
 735         * see ovs_nla_put_identifier()
 736         */
 737        if (sfid && ovs_identifier_is_ufid(sfid))
 738                len += nla_total_size(sfid->ufid_len);
 739        else
 740                len += nla_total_size(ovs_key_attr_size());
 741
 742        /* OVS_FLOW_ATTR_KEY */
 743        if (!sfid || should_fill_key(sfid, ufid_flags))
 744                len += nla_total_size(ovs_key_attr_size());
 745
 746        /* OVS_FLOW_ATTR_MASK */
 747        if (should_fill_mask(ufid_flags))
 748                len += nla_total_size(ovs_key_attr_size());
 749
 750        /* OVS_FLOW_ATTR_ACTIONS */
 751        if (should_fill_actions(ufid_flags))
 752                len += nla_total_size(acts->orig_len);
 753
 754        return len
 755                + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 756                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 757                + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
 758}
 759
 760/* Called with ovs_mutex or RCU read lock. */
 761static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 762                                   struct sk_buff *skb)
 763{
 764        struct ovs_flow_stats stats;
 765        __be16 tcp_flags;
 766        unsigned long used;
 767
 768        ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 769
 770        if (used &&
 771            nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
 772                              OVS_FLOW_ATTR_PAD))
 773                return -EMSGSIZE;
 774
 775        if (stats.n_packets &&
 776            nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
 777                          sizeof(struct ovs_flow_stats), &stats,
 778                          OVS_FLOW_ATTR_PAD))
 779                return -EMSGSIZE;
 780
 781        if ((u8)ntohs(tcp_flags) &&
 782             nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
 783                return -EMSGSIZE;
 784
 785        return 0;
 786}
 787
 788/* Called with ovs_mutex or RCU read lock. */
 789static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 790                                     struct sk_buff *skb, int skb_orig_len)
 791{
 792        struct nlattr *start;
 793        int err;
 794
 795        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 796         * this is the first flow to be dumped into 'skb'.  This is unusual for
 797         * Netlink but individual action lists can be longer than
 798         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 799         * The userspace caller can always fetch the actions separately if it
 800         * really wants them.  (Most userspace callers in fact don't care.)
 801         *
 802         * This can only fail for dump operations because the skb is always
 803         * properly sized for single flows.
 804         */
 805        start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
 806        if (start) {
 807                const struct sw_flow_actions *sf_acts;
 808
 809                sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 810                err = ovs_nla_put_actions(sf_acts->actions,
 811                                          sf_acts->actions_len, skb);
 812
 813                if (!err)
 814                        nla_nest_end(skb, start);
 815                else {
 816                        if (skb_orig_len)
 817                                return err;
 818
 819                        nla_nest_cancel(skb, start);
 820                }
 821        } else if (skb_orig_len) {
 822                return -EMSGSIZE;
 823        }
 824
 825        return 0;
 826}
 827
 828/* Called with ovs_mutex or RCU read lock. */
 829static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 830                                  struct sk_buff *skb, u32 portid,
 831                                  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 832{
 833        const int skb_orig_len = skb->len;
 834        struct ovs_header *ovs_header;
 835        int err;
 836
 837        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
 838                                 flags, cmd);
 839        if (!ovs_header)
 840                return -EMSGSIZE;
 841
 842        ovs_header->dp_ifindex = dp_ifindex;
 843
 844        err = ovs_nla_put_identifier(flow, skb);
 845        if (err)
 846                goto error;
 847
 848        if (should_fill_key(&flow->id, ufid_flags)) {
 849                err = ovs_nla_put_masked_key(flow, skb);
 850                if (err)
 851                        goto error;
 852        }
 853
 854        if (should_fill_mask(ufid_flags)) {
 855                err = ovs_nla_put_mask(flow, skb);
 856                if (err)
 857                        goto error;
 858        }
 859
 860        err = ovs_flow_cmd_fill_stats(flow, skb);
 861        if (err)
 862                goto error;
 863
 864        if (should_fill_actions(ufid_flags)) {
 865                err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
 866                if (err)
 867                        goto error;
 868        }
 869
 870        genlmsg_end(skb, ovs_header);
 871        return 0;
 872
 873error:
 874        genlmsg_cancel(skb, ovs_header);
 875        return err;
 876}
 877
 878/* May not be called with RCU read lock. */
 879static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
 880                                               const struct sw_flow_id *sfid,
 881                                               struct genl_info *info,
 882                                               bool always,
 883                                               uint32_t ufid_flags)
 884{
 885        struct sk_buff *skb;
 886        size_t len;
 887
 888        if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 889                return NULL;
 890
 891        len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
 892        skb = genlmsg_new(len, GFP_KERNEL);
 893        if (!skb)
 894                return ERR_PTR(-ENOMEM);
 895
 896        return skb;
 897}
 898
 899/* Called with ovs_mutex. */
 900static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 901                                               int dp_ifindex,
 902                                               struct genl_info *info, u8 cmd,
 903                                               bool always, u32 ufid_flags)
 904{
 905        struct sk_buff *skb;
 906        int retval;
 907
 908        skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
 909                                      &flow->id, info, always, ufid_flags);
 910        if (IS_ERR_OR_NULL(skb))
 911                return skb;
 912
 913        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 914                                        info->snd_portid, info->snd_seq, 0,
 915                                        cmd, ufid_flags);
 916        if (WARN_ON_ONCE(retval < 0)) {
 917                kfree_skb(skb);
 918                skb = ERR_PTR(retval);
 919        }
 920        return skb;
 921}
 922
 923static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 924{
 925        struct net *net = sock_net(skb->sk);
 926        struct nlattr **a = info->attrs;
 927        struct ovs_header *ovs_header = info->userhdr;
 928        struct sw_flow *flow = NULL, *new_flow;
 929        struct sw_flow_mask mask;
 930        struct sk_buff *reply;
 931        struct datapath *dp;
 932        struct sw_flow_actions *acts;
 933        struct sw_flow_match match;
 934        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 935        int error;
 936        bool log = !a[OVS_FLOW_ATTR_PROBE];
 937
 938        /* Must have key and actions. */
 939        error = -EINVAL;
 940        if (!a[OVS_FLOW_ATTR_KEY]) {
 941                OVS_NLERR(log, "Flow key attr not present in new flow.");
 942                goto error;
 943        }
 944        if (!a[OVS_FLOW_ATTR_ACTIONS]) {
 945                OVS_NLERR(log, "Flow actions attr not present in new flow.");
 946                goto error;
 947        }
 948
 949        /* Most of the time we need to allocate a new flow, do it before
 950         * locking.
 951         */
 952        new_flow = ovs_flow_alloc();
 953        if (IS_ERR(new_flow)) {
 954                error = PTR_ERR(new_flow);
 955                goto error;
 956        }
 957
 958        /* Extract key. */
 959        ovs_match_init(&match, &new_flow->key, false, &mask);
 960        error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 961                                  a[OVS_FLOW_ATTR_MASK], log);
 962        if (error)
 963                goto err_kfree_flow;
 964
 965        /* Extract flow identifier. */
 966        error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
 967                                       &new_flow->key, log);
 968        if (error)
 969                goto err_kfree_flow;
 970
 971        /* unmasked key is needed to match when ufid is not used. */
 972        if (ovs_identifier_is_key(&new_flow->id))
 973                match.key = new_flow->id.unmasked_key;
 974
 975        ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
 976
 977        /* Validate actions. */
 978        error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 979                                     &new_flow->key, &acts, log);
 980        if (error) {
 981                OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
 982                goto err_kfree_flow;
 983        }
 984
 985        reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
 986                                        ufid_flags);
 987        if (IS_ERR(reply)) {
 988                error = PTR_ERR(reply);
 989                goto err_kfree_acts;
 990        }
 991
 992        ovs_lock();
 993        dp = get_dp(net, ovs_header->dp_ifindex);
 994        if (unlikely(!dp)) {
 995                error = -ENODEV;
 996                goto err_unlock_ovs;
 997        }
 998
 999        /* Check if this is a duplicate flow */
1000        if (ovs_identifier_is_ufid(&new_flow->id))
1001                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1002        if (!flow)
1003                flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
1004        if (likely(!flow)) {
1005                rcu_assign_pointer(new_flow->sf_acts, acts);
1006
1007                /* Put flow in bucket. */
1008                error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1009                if (unlikely(error)) {
1010                        acts = NULL;
1011                        goto err_unlock_ovs;
1012                }
1013
1014                if (unlikely(reply)) {
1015                        error = ovs_flow_cmd_fill_info(new_flow,
1016                                                       ovs_header->dp_ifindex,
1017                                                       reply, info->snd_portid,
1018                                                       info->snd_seq, 0,
1019                                                       OVS_FLOW_CMD_NEW,
1020                                                       ufid_flags);
1021                        BUG_ON(error < 0);
1022                }
1023                ovs_unlock();
1024        } else {
1025                struct sw_flow_actions *old_acts;
1026
1027                /* Bail out if we're not allowed to modify an existing flow.
1028                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1029                 * because Generic Netlink treats the latter as a dump
1030                 * request.  We also accept NLM_F_EXCL in case that bug ever
1031                 * gets fixed.
1032                 */
1033                if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1034                                                         | NLM_F_EXCL))) {
1035                        error = -EEXIST;
1036                        goto err_unlock_ovs;
1037                }
1038                /* The flow identifier has to be the same for flow updates.
1039                 * Look for any overlapping flow.
1040                 */
1041                if (unlikely(!ovs_flow_cmp(flow, &match))) {
1042                        if (ovs_identifier_is_key(&flow->id))
1043                                flow = ovs_flow_tbl_lookup_exact(&dp->table,
1044                                                                 &match);
1045                        else /* UFID matches but key is different */
1046                                flow = NULL;
1047                        if (!flow) {
1048                                error = -ENOENT;
1049                                goto err_unlock_ovs;
1050                        }
1051                }
1052                /* Update actions. */
1053                old_acts = ovsl_dereference(flow->sf_acts);
1054                rcu_assign_pointer(flow->sf_acts, acts);
1055
1056                if (unlikely(reply)) {
1057                        error = ovs_flow_cmd_fill_info(flow,
1058                                                       ovs_header->dp_ifindex,
1059                                                       reply, info->snd_portid,
1060                                                       info->snd_seq, 0,
1061                                                       OVS_FLOW_CMD_NEW,
1062                                                       ufid_flags);
1063                        BUG_ON(error < 0);
1064                }
1065                ovs_unlock();
1066
1067                ovs_nla_free_flow_actions_rcu(old_acts);
1068                ovs_flow_free(new_flow, false);
1069        }
1070
1071        if (reply)
1072                ovs_notify(&dp_flow_genl_family, reply, info);
1073        return 0;
1074
1075err_unlock_ovs:
1076        ovs_unlock();
1077        kfree_skb(reply);
1078err_kfree_acts:
1079        ovs_nla_free_flow_actions(acts);
1080err_kfree_flow:
1081        ovs_flow_free(new_flow, false);
1082error:
1083        return error;
1084}
1085
1086/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1087static noinline_for_stack
1088struct sw_flow_actions *get_flow_actions(struct net *net,
1089                                         const struct nlattr *a,
1090                                         const struct sw_flow_key *key,
1091                                         const struct sw_flow_mask *mask,
1092                                         bool log)
1093{
1094        struct sw_flow_actions *acts;
1095        struct sw_flow_key masked_key;
1096        int error;
1097
1098        ovs_flow_mask_key(&masked_key, key, true, mask);
1099        error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1100        if (error) {
1101                OVS_NLERR(log,
1102                          "Actions may not be safe on all matching packets");
1103                return ERR_PTR(error);
1104        }
1105
1106        return acts;
1107}
1108
1109/* Factor out match-init and action-copy to avoid
1110 * "Wframe-larger-than=1024" warning. Because mask is only
1111 * used to get actions, we new a function to save some
1112 * stack space.
1113 *
1114 * If there are not key and action attrs, we return 0
1115 * directly. In the case, the caller will also not use the
1116 * match as before. If there is action attr, we try to get
1117 * actions and save them to *acts. Before returning from
1118 * the function, we reset the match->mask pointer. Because
1119 * we should not to return match object with dangling reference
1120 * to mask.
1121 * */
1122static noinline_for_stack int
1123ovs_nla_init_match_and_action(struct net *net,
1124                              struct sw_flow_match *match,
1125                              struct sw_flow_key *key,
1126                              struct nlattr **a,
1127                              struct sw_flow_actions **acts,
1128                              bool log)
1129{
1130        struct sw_flow_mask mask;
1131        int error = 0;
1132
1133        if (a[OVS_FLOW_ATTR_KEY]) {
1134                ovs_match_init(match, key, true, &mask);
1135                error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1136                                          a[OVS_FLOW_ATTR_MASK], log);
1137                if (error)
1138                        goto error;
1139        }
1140
1141        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1142                if (!a[OVS_FLOW_ATTR_KEY]) {
1143                        OVS_NLERR(log,
1144                                  "Flow key attribute not present in set flow.");
1145                        error = -EINVAL;
1146                        goto error;
1147                }
1148
1149                *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1150                                         &mask, log);
1151                if (IS_ERR(*acts)) {
1152                        error = PTR_ERR(*acts);
1153                        goto error;
1154                }
1155        }
1156
1157        /* On success, error is 0. */
1158error:
1159        match->mask = NULL;
1160        return error;
1161}
1162
1163static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1164{
1165        struct net *net = sock_net(skb->sk);
1166        struct nlattr **a = info->attrs;
1167        struct ovs_header *ovs_header = info->userhdr;
1168        struct sw_flow_key key;
1169        struct sw_flow *flow;
1170        struct sk_buff *reply = NULL;
1171        struct datapath *dp;
1172        struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1173        struct sw_flow_match match;
1174        struct sw_flow_id sfid;
1175        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1176        int error = 0;
1177        bool log = !a[OVS_FLOW_ATTR_PROBE];
1178        bool ufid_present;
1179
1180        ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1181        if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1182                OVS_NLERR(log,
1183                          "Flow set message rejected, Key attribute missing.");
1184                return -EINVAL;
1185        }
1186
1187        error = ovs_nla_init_match_and_action(net, &match, &key, a,
1188                                              &acts, log);
1189        if (error)
1190                goto error;
1191
1192        if (acts) {
1193                /* Can allocate before locking if have acts. */
1194                reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1195                                                ufid_flags);
1196                if (IS_ERR(reply)) {
1197                        error = PTR_ERR(reply);
1198                        goto err_kfree_acts;
1199                }
1200        }
1201
1202        ovs_lock();
1203        dp = get_dp(net, ovs_header->dp_ifindex);
1204        if (unlikely(!dp)) {
1205                error = -ENODEV;
1206                goto err_unlock_ovs;
1207        }
1208        /* Check that the flow exists. */
1209        if (ufid_present)
1210                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1211        else
1212                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1213        if (unlikely(!flow)) {
1214                error = -ENOENT;
1215                goto err_unlock_ovs;
1216        }
1217
1218        /* Update actions, if present. */
1219        if (likely(acts)) {
1220                old_acts = ovsl_dereference(flow->sf_acts);
1221                rcu_assign_pointer(flow->sf_acts, acts);
1222
1223                if (unlikely(reply)) {
1224                        error = ovs_flow_cmd_fill_info(flow,
1225                                                       ovs_header->dp_ifindex,
1226                                                       reply, info->snd_portid,
1227                                                       info->snd_seq, 0,
1228                                                       OVS_FLOW_CMD_SET,
1229                                                       ufid_flags);
1230                        BUG_ON(error < 0);
1231                }
1232        } else {
1233                /* Could not alloc without acts before locking. */
1234                reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1235                                                info, OVS_FLOW_CMD_SET, false,
1236                                                ufid_flags);
1237
1238                if (IS_ERR(reply)) {
1239                        error = PTR_ERR(reply);
1240                        goto err_unlock_ovs;
1241                }
1242        }
1243
1244        /* Clear stats. */
1245        if (a[OVS_FLOW_ATTR_CLEAR])
1246                ovs_flow_stats_clear(flow);
1247        ovs_unlock();
1248
1249        if (reply)
1250                ovs_notify(&dp_flow_genl_family, reply, info);
1251        if (old_acts)
1252                ovs_nla_free_flow_actions_rcu(old_acts);
1253
1254        return 0;
1255
1256err_unlock_ovs:
1257        ovs_unlock();
1258        kfree_skb(reply);
1259err_kfree_acts:
1260        ovs_nla_free_flow_actions(acts);
1261error:
1262        return error;
1263}
1264
1265static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1266{
1267        struct nlattr **a = info->attrs;
1268        struct ovs_header *ovs_header = info->userhdr;
1269        struct net *net = sock_net(skb->sk);
1270        struct sw_flow_key key;
1271        struct sk_buff *reply;
1272        struct sw_flow *flow;
1273        struct datapath *dp;
1274        struct sw_flow_match match;
1275        struct sw_flow_id ufid;
1276        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1277        int err = 0;
1278        bool log = !a[OVS_FLOW_ATTR_PROBE];
1279        bool ufid_present;
1280
1281        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1282        if (a[OVS_FLOW_ATTR_KEY]) {
1283                ovs_match_init(&match, &key, true, NULL);
1284                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1285                                        log);
1286        } else if (!ufid_present) {
1287                OVS_NLERR(log,
1288                          "Flow get message rejected, Key attribute missing.");
1289                err = -EINVAL;
1290        }
1291        if (err)
1292                return err;
1293
1294        ovs_lock();
1295        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1296        if (!dp) {
1297                err = -ENODEV;
1298                goto unlock;
1299        }
1300
1301        if (ufid_present)
1302                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1303        else
1304                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1305        if (!flow) {
1306                err = -ENOENT;
1307                goto unlock;
1308        }
1309
1310        reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1311                                        OVS_FLOW_CMD_GET, true, ufid_flags);
1312        if (IS_ERR(reply)) {
1313                err = PTR_ERR(reply);
1314                goto unlock;
1315        }
1316
1317        ovs_unlock();
1318        return genlmsg_reply(reply, info);
1319unlock:
1320        ovs_unlock();
1321        return err;
1322}
1323
1324static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1325{
1326        struct nlattr **a = info->attrs;
1327        struct ovs_header *ovs_header = info->userhdr;
1328        struct net *net = sock_net(skb->sk);
1329        struct sw_flow_key key;
1330        struct sk_buff *reply;
1331        struct sw_flow *flow = NULL;
1332        struct datapath *dp;
1333        struct sw_flow_match match;
1334        struct sw_flow_id ufid;
1335        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1336        int err;
1337        bool log = !a[OVS_FLOW_ATTR_PROBE];
1338        bool ufid_present;
1339
1340        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1341        if (a[OVS_FLOW_ATTR_KEY]) {
1342                ovs_match_init(&match, &key, true, NULL);
1343                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1344                                        NULL, log);
1345                if (unlikely(err))
1346                        return err;
1347        }
1348
1349        ovs_lock();
1350        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1351        if (unlikely(!dp)) {
1352                err = -ENODEV;
1353                goto unlock;
1354        }
1355
1356        if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1357                err = ovs_flow_tbl_flush(&dp->table);
1358                goto unlock;
1359        }
1360
1361        if (ufid_present)
1362                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1363        else
1364                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1365        if (unlikely(!flow)) {
1366                err = -ENOENT;
1367                goto unlock;
1368        }
1369
1370        ovs_flow_tbl_remove(&dp->table, flow);
1371        ovs_unlock();
1372
1373        reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1374                                        &flow->id, info, false, ufid_flags);
1375        if (likely(reply)) {
1376                if (!IS_ERR(reply)) {
1377                        rcu_read_lock();        /*To keep RCU checker happy. */
1378                        err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1379                                                     reply, info->snd_portid,
1380                                                     info->snd_seq, 0,
1381                                                     OVS_FLOW_CMD_DEL,
1382                                                     ufid_flags);
1383                        rcu_read_unlock();
1384                        if (WARN_ON_ONCE(err < 0)) {
1385                                kfree_skb(reply);
1386                                goto out_free;
1387                        }
1388
1389                        ovs_notify(&dp_flow_genl_family, reply, info);
1390                } else {
1391                        netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1392                                        PTR_ERR(reply));
1393                }
1394        }
1395
1396out_free:
1397        ovs_flow_free(flow, true);
1398        return 0;
1399unlock:
1400        ovs_unlock();
1401        return err;
1402}
1403
1404static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1405{
1406        struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1407        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1408        struct table_instance *ti;
1409        struct datapath *dp;
1410        u32 ufid_flags;
1411        int err;
1412
1413        err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1414                                       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1415        if (err)
1416                return err;
1417        ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1418
1419        rcu_read_lock();
1420        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1421        if (!dp) {
1422                rcu_read_unlock();
1423                return -ENODEV;
1424        }
1425
1426        ti = rcu_dereference(dp->table.ti);
1427        for (;;) {
1428                struct sw_flow *flow;
1429                u32 bucket, obj;
1430
1431                bucket = cb->args[0];
1432                obj = cb->args[1];
1433                flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1434                if (!flow)
1435                        break;
1436
1437                if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1438                                           NETLINK_CB(cb->skb).portid,
1439                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1440                                           OVS_FLOW_CMD_GET, ufid_flags) < 0)
1441                        break;
1442
1443                cb->args[0] = bucket;
1444                cb->args[1] = obj;
1445        }
1446        rcu_read_unlock();
1447        return skb->len;
1448}
1449
1450static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1451        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1452        [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1453        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1454        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1455        [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1456        [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1457        [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1458};
1459
1460static const struct genl_small_ops dp_flow_genl_ops[] = {
1461        { .cmd = OVS_FLOW_CMD_NEW,
1462          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1463          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1464          .doit = ovs_flow_cmd_new
1465        },
1466        { .cmd = OVS_FLOW_CMD_DEL,
1467          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1468          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1469          .doit = ovs_flow_cmd_del
1470        },
1471        { .cmd = OVS_FLOW_CMD_GET,
1472          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1473          .flags = 0,               /* OK for unprivileged users. */
1474          .doit = ovs_flow_cmd_get,
1475          .dumpit = ovs_flow_cmd_dump
1476        },
1477        { .cmd = OVS_FLOW_CMD_SET,
1478          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1479          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1480          .doit = ovs_flow_cmd_set,
1481        },
1482};
1483
1484static struct genl_family dp_flow_genl_family __ro_after_init = {
1485        .hdrsize = sizeof(struct ovs_header),
1486        .name = OVS_FLOW_FAMILY,
1487        .version = OVS_FLOW_VERSION,
1488        .maxattr = OVS_FLOW_ATTR_MAX,
1489        .policy = flow_policy,
1490        .netnsok = true,
1491        .parallel_ops = true,
1492        .small_ops = dp_flow_genl_ops,
1493        .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1494        .mcgrps = &ovs_dp_flow_multicast_group,
1495        .n_mcgrps = 1,
1496        .module = THIS_MODULE,
1497};
1498
1499static size_t ovs_dp_cmd_msg_size(void)
1500{
1501        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1502
1503        msgsize += nla_total_size(IFNAMSIZ);
1504        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1505        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1506        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1507        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1508
1509        return msgsize;
1510}
1511
1512/* Called with ovs_mutex. */
1513static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1514                                u32 portid, u32 seq, u32 flags, u8 cmd)
1515{
1516        struct ovs_header *ovs_header;
1517        struct ovs_dp_stats dp_stats;
1518        struct ovs_dp_megaflow_stats dp_megaflow_stats;
1519        int err;
1520
1521        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1522                                 flags, cmd);
1523        if (!ovs_header)
1524                goto error;
1525
1526        ovs_header->dp_ifindex = get_dpifindex(dp);
1527
1528        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1529        if (err)
1530                goto nla_put_failure;
1531
1532        get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1533        if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1534                          &dp_stats, OVS_DP_ATTR_PAD))
1535                goto nla_put_failure;
1536
1537        if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1538                          sizeof(struct ovs_dp_megaflow_stats),
1539                          &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1540                goto nla_put_failure;
1541
1542        if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1543                goto nla_put_failure;
1544
1545        if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1546                        ovs_flow_tbl_masks_cache_size(&dp->table)))
1547                goto nla_put_failure;
1548
1549        genlmsg_end(skb, ovs_header);
1550        return 0;
1551
1552nla_put_failure:
1553        genlmsg_cancel(skb, ovs_header);
1554error:
1555        return -EMSGSIZE;
1556}
1557
1558static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1559{
1560        return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1561}
1562
1563/* Called with rcu_read_lock or ovs_mutex. */
1564static struct datapath *lookup_datapath(struct net *net,
1565                                        const struct ovs_header *ovs_header,
1566                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1567{
1568        struct datapath *dp;
1569
1570        if (!a[OVS_DP_ATTR_NAME])
1571                dp = get_dp(net, ovs_header->dp_ifindex);
1572        else {
1573                struct vport *vport;
1574
1575                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1576                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1577        }
1578        return dp ? dp : ERR_PTR(-ENODEV);
1579}
1580
1581static void ovs_dp_reset_user_features(struct sk_buff *skb,
1582                                       struct genl_info *info)
1583{
1584        struct datapath *dp;
1585
1586        dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
1587                             info->attrs);
1588        if (IS_ERR(dp))
1589                return;
1590
1591        WARN(dp->user_features, "Dropping previously announced user features\n");
1592        dp->user_features = 0;
1593}
1594
1595DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
1596
1597static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1598{
1599        u32 user_features = 0;
1600
1601        if (a[OVS_DP_ATTR_USER_FEATURES]) {
1602                user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1603
1604                if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1605                                      OVS_DP_F_UNALIGNED |
1606                                      OVS_DP_F_TC_RECIRC_SHARING))
1607                        return -EOPNOTSUPP;
1608
1609#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1610                if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1611                        return -EOPNOTSUPP;
1612#endif
1613        }
1614
1615        if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1616                int err;
1617                u32 cache_size;
1618
1619                cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1620                err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1621                if (err)
1622                        return err;
1623        }
1624
1625        dp->user_features = user_features;
1626
1627        if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1628                static_branch_enable(&tc_recirc_sharing_support);
1629        else
1630                static_branch_disable(&tc_recirc_sharing_support);
1631
1632        return 0;
1633}
1634
1635static int ovs_dp_stats_init(struct datapath *dp)
1636{
1637        dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1638        if (!dp->stats_percpu)
1639                return -ENOMEM;
1640
1641        return 0;
1642}
1643
1644static int ovs_dp_vport_init(struct datapath *dp)
1645{
1646        int i;
1647
1648        dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1649                                  sizeof(struct hlist_head),
1650                                  GFP_KERNEL);
1651        if (!dp->ports)
1652                return -ENOMEM;
1653
1654        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1655                INIT_HLIST_HEAD(&dp->ports[i]);
1656
1657        return 0;
1658}
1659
1660static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1661{
1662        struct nlattr **a = info->attrs;
1663        struct vport_parms parms;
1664        struct sk_buff *reply;
1665        struct datapath *dp;
1666        struct vport *vport;
1667        struct ovs_net *ovs_net;
1668        int err;
1669
1670        err = -EINVAL;
1671        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1672                goto err;
1673
1674        reply = ovs_dp_cmd_alloc_info();
1675        if (!reply)
1676                return -ENOMEM;
1677
1678        err = -ENOMEM;
1679        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1680        if (dp == NULL)
1681                goto err_destroy_reply;
1682
1683        ovs_dp_set_net(dp, sock_net(skb->sk));
1684
1685        /* Allocate table. */
1686        err = ovs_flow_tbl_init(&dp->table);
1687        if (err)
1688                goto err_destroy_dp;
1689
1690        err = ovs_dp_stats_init(dp);
1691        if (err)
1692                goto err_destroy_table;
1693
1694        err = ovs_dp_vport_init(dp);
1695        if (err)
1696                goto err_destroy_stats;
1697
1698        err = ovs_meters_init(dp);
1699        if (err)
1700                goto err_destroy_ports;
1701
1702        /* Set up our datapath device. */
1703        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1704        parms.type = OVS_VPORT_TYPE_INTERNAL;
1705        parms.options = NULL;
1706        parms.dp = dp;
1707        parms.port_no = OVSP_LOCAL;
1708        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1709
1710        /* So far only local changes have been made, now need the lock. */
1711        ovs_lock();
1712
1713        err = ovs_dp_change(dp, a);
1714        if (err)
1715                goto err_unlock_and_destroy_meters;
1716
1717        vport = new_vport(&parms);
1718        if (IS_ERR(vport)) {
1719                err = PTR_ERR(vport);
1720                if (err == -EBUSY)
1721                        err = -EEXIST;
1722
1723                if (err == -EEXIST) {
1724                        /* An outdated user space instance that does not understand
1725                         * the concept of user_features has attempted to create a new
1726                         * datapath and is likely to reuse it. Drop all user features.
1727                         */
1728                        if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1729                                ovs_dp_reset_user_features(skb, info);
1730                }
1731
1732                goto err_unlock_and_destroy_meters;
1733        }
1734
1735        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1736                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1737        BUG_ON(err < 0);
1738
1739        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1740        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1741
1742        ovs_unlock();
1743
1744        ovs_notify(&dp_datapath_genl_family, reply, info);
1745        return 0;
1746
1747err_unlock_and_destroy_meters:
1748        ovs_unlock();
1749        ovs_meters_exit(dp);
1750err_destroy_ports:
1751        kfree(dp->ports);
1752err_destroy_stats:
1753        free_percpu(dp->stats_percpu);
1754err_destroy_table:
1755        ovs_flow_tbl_destroy(&dp->table);
1756err_destroy_dp:
1757        kfree(dp);
1758err_destroy_reply:
1759        kfree_skb(reply);
1760err:
1761        return err;
1762}
1763
1764/* Called with ovs_mutex. */
1765static void __dp_destroy(struct datapath *dp)
1766{
1767        struct flow_table *table = &dp->table;
1768        int i;
1769
1770        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1771                struct vport *vport;
1772                struct hlist_node *n;
1773
1774                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1775                        if (vport->port_no != OVSP_LOCAL)
1776                                ovs_dp_detach_port(vport);
1777        }
1778
1779        list_del_rcu(&dp->list_node);
1780
1781        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1782         * all ports in datapath are destroyed first before freeing datapath.
1783         */
1784        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1785
1786        /* Flush sw_flow in the tables. RCU cb only releases resource
1787         * such as dp, ports and tables. That may avoid some issues
1788         * such as RCU usage warning.
1789         */
1790        table_instance_flow_flush(table, ovsl_dereference(table->ti),
1791                                  ovsl_dereference(table->ufid_ti));
1792
1793        /* RCU destroy the ports, meters and flow tables. */
1794        call_rcu(&dp->rcu, destroy_dp_rcu);
1795}
1796
1797static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1798{
1799        struct sk_buff *reply;
1800        struct datapath *dp;
1801        int err;
1802
1803        reply = ovs_dp_cmd_alloc_info();
1804        if (!reply)
1805                return -ENOMEM;
1806
1807        ovs_lock();
1808        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1809        err = PTR_ERR(dp);
1810        if (IS_ERR(dp))
1811                goto err_unlock_free;
1812
1813        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1814                                   info->snd_seq, 0, OVS_DP_CMD_DEL);
1815        BUG_ON(err < 0);
1816
1817        __dp_destroy(dp);
1818        ovs_unlock();
1819
1820        ovs_notify(&dp_datapath_genl_family, reply, info);
1821
1822        return 0;
1823
1824err_unlock_free:
1825        ovs_unlock();
1826        kfree_skb(reply);
1827        return err;
1828}
1829
1830static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1831{
1832        struct sk_buff *reply;
1833        struct datapath *dp;
1834        int err;
1835
1836        reply = ovs_dp_cmd_alloc_info();
1837        if (!reply)
1838                return -ENOMEM;
1839
1840        ovs_lock();
1841        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1842        err = PTR_ERR(dp);
1843        if (IS_ERR(dp))
1844                goto err_unlock_free;
1845
1846        err = ovs_dp_change(dp, info->attrs);
1847        if (err)
1848                goto err_unlock_free;
1849
1850        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1851                                   info->snd_seq, 0, OVS_DP_CMD_SET);
1852        BUG_ON(err < 0);
1853
1854        ovs_unlock();
1855        ovs_notify(&dp_datapath_genl_family, reply, info);
1856
1857        return 0;
1858
1859err_unlock_free:
1860        ovs_unlock();
1861        kfree_skb(reply);
1862        return err;
1863}
1864
1865static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1866{
1867        struct sk_buff *reply;
1868        struct datapath *dp;
1869        int err;
1870
1871        reply = ovs_dp_cmd_alloc_info();
1872        if (!reply)
1873                return -ENOMEM;
1874
1875        ovs_lock();
1876        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1877        if (IS_ERR(dp)) {
1878                err = PTR_ERR(dp);
1879                goto err_unlock_free;
1880        }
1881        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1882                                   info->snd_seq, 0, OVS_DP_CMD_GET);
1883        BUG_ON(err < 0);
1884        ovs_unlock();
1885
1886        return genlmsg_reply(reply, info);
1887
1888err_unlock_free:
1889        ovs_unlock();
1890        kfree_skb(reply);
1891        return err;
1892}
1893
1894static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1895{
1896        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1897        struct datapath *dp;
1898        int skip = cb->args[0];
1899        int i = 0;
1900
1901        ovs_lock();
1902        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1903                if (i >= skip &&
1904                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1905                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1906                                         OVS_DP_CMD_GET) < 0)
1907                        break;
1908                i++;
1909        }
1910        ovs_unlock();
1911
1912        cb->args[0] = i;
1913
1914        return skb->len;
1915}
1916
1917static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1918        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1919        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1920        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1921        [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
1922                PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
1923};
1924
1925static const struct genl_small_ops dp_datapath_genl_ops[] = {
1926        { .cmd = OVS_DP_CMD_NEW,
1927          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1928          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1929          .doit = ovs_dp_cmd_new
1930        },
1931        { .cmd = OVS_DP_CMD_DEL,
1932          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1933          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1934          .doit = ovs_dp_cmd_del
1935        },
1936        { .cmd = OVS_DP_CMD_GET,
1937          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1938          .flags = 0,               /* OK for unprivileged users. */
1939          .doit = ovs_dp_cmd_get,
1940          .dumpit = ovs_dp_cmd_dump
1941        },
1942        { .cmd = OVS_DP_CMD_SET,
1943          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1944          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1945          .doit = ovs_dp_cmd_set,
1946        },
1947};
1948
1949static struct genl_family dp_datapath_genl_family __ro_after_init = {
1950        .hdrsize = sizeof(struct ovs_header),
1951        .name = OVS_DATAPATH_FAMILY,
1952        .version = OVS_DATAPATH_VERSION,
1953        .maxattr = OVS_DP_ATTR_MAX,
1954        .policy = datapath_policy,
1955        .netnsok = true,
1956        .parallel_ops = true,
1957        .small_ops = dp_datapath_genl_ops,
1958        .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1959        .mcgrps = &ovs_dp_datapath_multicast_group,
1960        .n_mcgrps = 1,
1961        .module = THIS_MODULE,
1962};
1963
1964/* Called with ovs_mutex or RCU read lock. */
1965static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1966                                   struct net *net, u32 portid, u32 seq,
1967                                   u32 flags, u8 cmd, gfp_t gfp)
1968{
1969        struct ovs_header *ovs_header;
1970        struct ovs_vport_stats vport_stats;
1971        int err;
1972
1973        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1974                                 flags, cmd);
1975        if (!ovs_header)
1976                return -EMSGSIZE;
1977
1978        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1979
1980        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1981            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1982            nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1983                           ovs_vport_name(vport)) ||
1984            nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
1985                goto nla_put_failure;
1986
1987        if (!net_eq(net, dev_net(vport->dev))) {
1988                int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
1989
1990                if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
1991                        goto nla_put_failure;
1992        }
1993
1994        ovs_vport_get_stats(vport, &vport_stats);
1995        if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1996                          sizeof(struct ovs_vport_stats), &vport_stats,
1997                          OVS_VPORT_ATTR_PAD))
1998                goto nla_put_failure;
1999
2000        if (ovs_vport_get_upcall_portids(vport, skb))
2001                goto nla_put_failure;
2002
2003        err = ovs_vport_get_options(vport, skb);
2004        if (err == -EMSGSIZE)
2005                goto error;
2006
2007        genlmsg_end(skb, ovs_header);
2008        return 0;
2009
2010nla_put_failure:
2011        err = -EMSGSIZE;
2012error:
2013        genlmsg_cancel(skb, ovs_header);
2014        return err;
2015}
2016
2017static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2018{
2019        return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2020}
2021
2022/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
2023struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2024                                         u32 portid, u32 seq, u8 cmd)
2025{
2026        struct sk_buff *skb;
2027        int retval;
2028
2029        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2030        if (!skb)
2031                return ERR_PTR(-ENOMEM);
2032
2033        retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2034                                         GFP_KERNEL);
2035        BUG_ON(retval < 0);
2036
2037        return skb;
2038}
2039
2040/* Called with ovs_mutex or RCU read lock. */
2041static struct vport *lookup_vport(struct net *net,
2042                                  const struct ovs_header *ovs_header,
2043                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2044{
2045        struct datapath *dp;
2046        struct vport *vport;
2047
2048        if (a[OVS_VPORT_ATTR_IFINDEX])
2049                return ERR_PTR(-EOPNOTSUPP);
2050        if (a[OVS_VPORT_ATTR_NAME]) {
2051                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2052                if (!vport)
2053                        return ERR_PTR(-ENODEV);
2054                if (ovs_header->dp_ifindex &&
2055                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2056                        return ERR_PTR(-ENODEV);
2057                return vport;
2058        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2059                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2060
2061                if (port_no >= DP_MAX_PORTS)
2062                        return ERR_PTR(-EFBIG);
2063
2064                dp = get_dp(net, ovs_header->dp_ifindex);
2065                if (!dp)
2066                        return ERR_PTR(-ENODEV);
2067
2068                vport = ovs_vport_ovsl_rcu(dp, port_no);
2069                if (!vport)
2070                        return ERR_PTR(-ENODEV);
2071                return vport;
2072        } else
2073                return ERR_PTR(-EINVAL);
2074
2075}
2076
2077static unsigned int ovs_get_max_headroom(struct datapath *dp)
2078{
2079        unsigned int dev_headroom, max_headroom = 0;
2080        struct net_device *dev;
2081        struct vport *vport;
2082        int i;
2083
2084        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2085                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2086                                         lockdep_ovsl_is_held()) {
2087                        dev = vport->dev;
2088                        dev_headroom = netdev_get_fwd_headroom(dev);
2089                        if (dev_headroom > max_headroom)
2090                                max_headroom = dev_headroom;
2091                }
2092        }
2093
2094        return max_headroom;
2095}
2096
2097/* Called with ovs_mutex */
2098static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2099{
2100        struct vport *vport;
2101        int i;
2102
2103        dp->max_headroom = new_headroom;
2104        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2105                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2106                                         lockdep_ovsl_is_held())
2107                        netdev_set_rx_headroom(vport->dev, new_headroom);
2108        }
2109}
2110
2111static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2112{
2113        struct nlattr **a = info->attrs;
2114        struct ovs_header *ovs_header = info->userhdr;
2115        struct vport_parms parms;
2116        struct sk_buff *reply;
2117        struct vport *vport;
2118        struct datapath *dp;
2119        unsigned int new_headroom;
2120        u32 port_no;
2121        int err;
2122
2123        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2124            !a[OVS_VPORT_ATTR_UPCALL_PID])
2125                return -EINVAL;
2126        if (a[OVS_VPORT_ATTR_IFINDEX])
2127                return -EOPNOTSUPP;
2128
2129        port_no = a[OVS_VPORT_ATTR_PORT_NO]
2130                ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2131        if (port_no >= DP_MAX_PORTS)
2132                return -EFBIG;
2133
2134        reply = ovs_vport_cmd_alloc_info();
2135        if (!reply)
2136                return -ENOMEM;
2137
2138        ovs_lock();
2139restart:
2140        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2141        err = -ENODEV;
2142        if (!dp)
2143                goto exit_unlock_free;
2144
2145        if (port_no) {
2146                vport = ovs_vport_ovsl(dp, port_no);
2147                err = -EBUSY;
2148                if (vport)
2149                        goto exit_unlock_free;
2150        } else {
2151                for (port_no = 1; ; port_no++) {
2152                        if (port_no >= DP_MAX_PORTS) {
2153                                err = -EFBIG;
2154                                goto exit_unlock_free;
2155                        }
2156                        vport = ovs_vport_ovsl(dp, port_no);
2157                        if (!vport)
2158                                break;
2159                }
2160        }
2161
2162        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2163        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2164        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2165        parms.dp = dp;
2166        parms.port_no = port_no;
2167        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2168
2169        vport = new_vport(&parms);
2170        err = PTR_ERR(vport);
2171        if (IS_ERR(vport)) {
2172                if (err == -EAGAIN)
2173                        goto restart;
2174                goto exit_unlock_free;
2175        }
2176
2177        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2178                                      info->snd_portid, info->snd_seq, 0,
2179                                      OVS_VPORT_CMD_NEW, GFP_KERNEL);
2180
2181        new_headroom = netdev_get_fwd_headroom(vport->dev);
2182
2183        if (new_headroom > dp->max_headroom)
2184                ovs_update_headroom(dp, new_headroom);
2185        else
2186                netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2187
2188        BUG_ON(err < 0);
2189        ovs_unlock();
2190
2191        ovs_notify(&dp_vport_genl_family, reply, info);
2192        return 0;
2193
2194exit_unlock_free:
2195        ovs_unlock();
2196        kfree_skb(reply);
2197        return err;
2198}
2199
2200static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2201{
2202        struct nlattr **a = info->attrs;
2203        struct sk_buff *reply;
2204        struct vport *vport;
2205        int err;
2206
2207        reply = ovs_vport_cmd_alloc_info();
2208        if (!reply)
2209                return -ENOMEM;
2210
2211        ovs_lock();
2212        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2213        err = PTR_ERR(vport);
2214        if (IS_ERR(vport))
2215                goto exit_unlock_free;
2216
2217        if (a[OVS_VPORT_ATTR_TYPE] &&
2218            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2219                err = -EINVAL;
2220                goto exit_unlock_free;
2221        }
2222
2223        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2224                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2225                if (err)
2226                        goto exit_unlock_free;
2227        }
2228
2229
2230        if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2231                struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2232
2233                err = ovs_vport_set_upcall_portids(vport, ids);
2234                if (err)
2235                        goto exit_unlock_free;
2236        }
2237
2238        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2239                                      info->snd_portid, info->snd_seq, 0,
2240                                      OVS_VPORT_CMD_SET, GFP_KERNEL);
2241        BUG_ON(err < 0);
2242
2243        ovs_unlock();
2244        ovs_notify(&dp_vport_genl_family, reply, info);
2245        return 0;
2246
2247exit_unlock_free:
2248        ovs_unlock();
2249        kfree_skb(reply);
2250        return err;
2251}
2252
2253static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2254{
2255        bool update_headroom = false;
2256        struct nlattr **a = info->attrs;
2257        struct sk_buff *reply;
2258        struct datapath *dp;
2259        struct vport *vport;
2260        unsigned int new_headroom;
2261        int err;
2262
2263        reply = ovs_vport_cmd_alloc_info();
2264        if (!reply)
2265                return -ENOMEM;
2266
2267        ovs_lock();
2268        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2269        err = PTR_ERR(vport);
2270        if (IS_ERR(vport))
2271                goto exit_unlock_free;
2272
2273        if (vport->port_no == OVSP_LOCAL) {
2274                err = -EINVAL;
2275                goto exit_unlock_free;
2276        }
2277
2278        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2279                                      info->snd_portid, info->snd_seq, 0,
2280                                      OVS_VPORT_CMD_DEL, GFP_KERNEL);
2281        BUG_ON(err < 0);
2282
2283        /* the vport deletion may trigger dp headroom update */
2284        dp = vport->dp;
2285        if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2286                update_headroom = true;
2287
2288        netdev_reset_rx_headroom(vport->dev);
2289        ovs_dp_detach_port(vport);
2290
2291        if (update_headroom) {
2292                new_headroom = ovs_get_max_headroom(dp);
2293
2294                if (new_headroom < dp->max_headroom)
2295                        ovs_update_headroom(dp, new_headroom);
2296        }
2297        ovs_unlock();
2298
2299        ovs_notify(&dp_vport_genl_family, reply, info);
2300        return 0;
2301
2302exit_unlock_free:
2303        ovs_unlock();
2304        kfree_skb(reply);
2305        return err;
2306}
2307
2308static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2309{
2310        struct nlattr **a = info->attrs;
2311        struct ovs_header *ovs_header = info->userhdr;
2312        struct sk_buff *reply;
2313        struct vport *vport;
2314        int err;
2315
2316        reply = ovs_vport_cmd_alloc_info();
2317        if (!reply)
2318                return -ENOMEM;
2319
2320        rcu_read_lock();
2321        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2322        err = PTR_ERR(vport);
2323        if (IS_ERR(vport))
2324                goto exit_unlock_free;
2325        err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2326                                      info->snd_portid, info->snd_seq, 0,
2327                                      OVS_VPORT_CMD_GET, GFP_ATOMIC);
2328        BUG_ON(err < 0);
2329        rcu_read_unlock();
2330
2331        return genlmsg_reply(reply, info);
2332
2333exit_unlock_free:
2334        rcu_read_unlock();
2335        kfree_skb(reply);
2336        return err;
2337}
2338
2339static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2340{
2341        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2342        struct datapath *dp;
2343        int bucket = cb->args[0], skip = cb->args[1];
2344        int i, j = 0;
2345
2346        rcu_read_lock();
2347        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2348        if (!dp) {
2349                rcu_read_unlock();
2350                return -ENODEV;
2351        }
2352        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2353                struct vport *vport;
2354
2355                j = 0;
2356                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2357                        if (j >= skip &&
2358                            ovs_vport_cmd_fill_info(vport, skb,
2359                                                    sock_net(skb->sk),
2360                                                    NETLINK_CB(cb->skb).portid,
2361                                                    cb->nlh->nlmsg_seq,
2362                                                    NLM_F_MULTI,
2363                                                    OVS_VPORT_CMD_GET,
2364                                                    GFP_ATOMIC) < 0)
2365                                goto out;
2366
2367                        j++;
2368                }
2369                skip = 0;
2370        }
2371out:
2372        rcu_read_unlock();
2373
2374        cb->args[0] = i;
2375        cb->args[1] = j;
2376
2377        return skb->len;
2378}
2379
2380static void ovs_dp_masks_rebalance(struct work_struct *work)
2381{
2382        struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2383                                               masks_rebalance.work);
2384        struct datapath *dp;
2385
2386        ovs_lock();
2387
2388        list_for_each_entry(dp, &ovs_net->dps, list_node)
2389                ovs_flow_masks_rebalance(&dp->table);
2390
2391        ovs_unlock();
2392
2393        schedule_delayed_work(&ovs_net->masks_rebalance,
2394                              msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2395}
2396
2397static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2398        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2399        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2400        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2401        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2402        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2403        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2404        [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2405        [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2406};
2407
2408static const struct genl_small_ops dp_vport_genl_ops[] = {
2409        { .cmd = OVS_VPORT_CMD_NEW,
2410          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2411          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2412          .doit = ovs_vport_cmd_new
2413        },
2414        { .cmd = OVS_VPORT_CMD_DEL,
2415          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2416          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2417          .doit = ovs_vport_cmd_del
2418        },
2419        { .cmd = OVS_VPORT_CMD_GET,
2420          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2421          .flags = 0,               /* OK for unprivileged users. */
2422          .doit = ovs_vport_cmd_get,
2423          .dumpit = ovs_vport_cmd_dump
2424        },
2425        { .cmd = OVS_VPORT_CMD_SET,
2426          .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2427          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2428          .doit = ovs_vport_cmd_set,
2429        },
2430};
2431
2432struct genl_family dp_vport_genl_family __ro_after_init = {
2433        .hdrsize = sizeof(struct ovs_header),
2434        .name = OVS_VPORT_FAMILY,
2435        .version = OVS_VPORT_VERSION,
2436        .maxattr = OVS_VPORT_ATTR_MAX,
2437        .policy = vport_policy,
2438        .netnsok = true,
2439        .parallel_ops = true,
2440        .small_ops = dp_vport_genl_ops,
2441        .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2442        .mcgrps = &ovs_dp_vport_multicast_group,
2443        .n_mcgrps = 1,
2444        .module = THIS_MODULE,
2445};
2446
2447static struct genl_family * const dp_genl_families[] = {
2448        &dp_datapath_genl_family,
2449        &dp_vport_genl_family,
2450        &dp_flow_genl_family,
2451        &dp_packet_genl_family,
2452        &dp_meter_genl_family,
2453#if     IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2454        &dp_ct_limit_genl_family,
2455#endif
2456};
2457
2458static void dp_unregister_genl(int n_families)
2459{
2460        int i;
2461
2462        for (i = 0; i < n_families; i++)
2463                genl_unregister_family(dp_genl_families[i]);
2464}
2465
2466static int __init dp_register_genl(void)
2467{
2468        int err;
2469        int i;
2470
2471        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2472
2473                err = genl_register_family(dp_genl_families[i]);
2474                if (err)
2475                        goto error;
2476        }
2477
2478        return 0;
2479
2480error:
2481        dp_unregister_genl(i);
2482        return err;
2483}
2484
2485static int __net_init ovs_init_net(struct net *net)
2486{
2487        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2488        int err;
2489
2490        INIT_LIST_HEAD(&ovs_net->dps);
2491        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2492        INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2493
2494        err = ovs_ct_init(net);
2495        if (err)
2496                return err;
2497
2498        schedule_delayed_work(&ovs_net->masks_rebalance,
2499                              msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2500        return 0;
2501}
2502
2503static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2504                                            struct list_head *head)
2505{
2506        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2507        struct datapath *dp;
2508
2509        list_for_each_entry(dp, &ovs_net->dps, list_node) {
2510                int i;
2511
2512                for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2513                        struct vport *vport;
2514
2515                        hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2516                                if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2517                                        continue;
2518
2519                                if (dev_net(vport->dev) == dnet)
2520                                        list_add(&vport->detach_list, head);
2521                        }
2522                }
2523        }
2524}
2525
2526static void __net_exit ovs_exit_net(struct net *dnet)
2527{
2528        struct datapath *dp, *dp_next;
2529        struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2530        struct vport *vport, *vport_next;
2531        struct net *net;
2532        LIST_HEAD(head);
2533
2534        ovs_lock();
2535
2536        ovs_ct_exit(dnet);
2537
2538        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2539                __dp_destroy(dp);
2540
2541        down_read(&net_rwsem);
2542        for_each_net(net)
2543                list_vports_from_net(net, dnet, &head);
2544        up_read(&net_rwsem);
2545
2546        /* Detach all vports from given namespace. */
2547        list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2548                list_del(&vport->detach_list);
2549                ovs_dp_detach_port(vport);
2550        }
2551
2552        ovs_unlock();
2553
2554        cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2555        cancel_work_sync(&ovs_net->dp_notify_work);
2556}
2557
2558static struct pernet_operations ovs_net_ops = {
2559        .init = ovs_init_net,
2560        .exit = ovs_exit_net,
2561        .id   = &ovs_net_id,
2562        .size = sizeof(struct ovs_net),
2563};
2564
2565static int __init dp_init(void)
2566{
2567        int err;
2568
2569        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2570                     sizeof_field(struct sk_buff, cb));
2571
2572        pr_info("Open vSwitch switching datapath\n");
2573
2574        err = action_fifos_init();
2575        if (err)
2576                goto error;
2577
2578        err = ovs_internal_dev_rtnl_link_register();
2579        if (err)
2580                goto error_action_fifos_exit;
2581
2582        err = ovs_flow_init();
2583        if (err)
2584                goto error_unreg_rtnl_link;
2585
2586        err = ovs_vport_init();
2587        if (err)
2588                goto error_flow_exit;
2589
2590        err = register_pernet_device(&ovs_net_ops);
2591        if (err)
2592                goto error_vport_exit;
2593
2594        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2595        if (err)
2596                goto error_netns_exit;
2597
2598        err = ovs_netdev_init();
2599        if (err)
2600                goto error_unreg_notifier;
2601
2602        err = dp_register_genl();
2603        if (err < 0)
2604                goto error_unreg_netdev;
2605
2606        return 0;
2607
2608error_unreg_netdev:
2609        ovs_netdev_exit();
2610error_unreg_notifier:
2611        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2612error_netns_exit:
2613        unregister_pernet_device(&ovs_net_ops);
2614error_vport_exit:
2615        ovs_vport_exit();
2616error_flow_exit:
2617        ovs_flow_exit();
2618error_unreg_rtnl_link:
2619        ovs_internal_dev_rtnl_link_unregister();
2620error_action_fifos_exit:
2621        action_fifos_exit();
2622error:
2623        return err;
2624}
2625
2626static void dp_cleanup(void)
2627{
2628        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2629        ovs_netdev_exit();
2630        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2631        unregister_pernet_device(&ovs_net_ops);
2632        rcu_barrier();
2633        ovs_vport_exit();
2634        ovs_flow_exit();
2635        ovs_internal_dev_rtnl_link_unregister();
2636        action_fifos_exit();
2637}
2638
2639module_init(dp_init);
2640module_exit(dp_cleanup);
2641
2642MODULE_DESCRIPTION("Open vSwitch switching datapath");
2643MODULE_LICENSE("GPL");
2644MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2645MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2646MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2647MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2648MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2649MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
2650