linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2014 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/openvswitch.h>
  48#include <linux/rculist.h>
  49#include <linux/dmi.h>
  50#include <net/genetlink.h>
  51#include <net/net_namespace.h>
  52#include <net/netns/generic.h>
  53
  54#include "datapath.h"
  55#include "flow.h"
  56#include "flow_table.h"
  57#include "flow_netlink.h"
  58#include "vport-internal_dev.h"
  59#include "vport-netdev.h"
  60
  61int ovs_net_id __read_mostly;
  62EXPORT_SYMBOL_GPL(ovs_net_id);
  63
  64static struct genl_family dp_packet_genl_family;
  65static struct genl_family dp_flow_genl_family;
  66static struct genl_family dp_datapath_genl_family;
  67
  68static const struct nla_policy flow_policy[];
  69
  70static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
  71        .name = OVS_FLOW_MCGROUP,
  72};
  73
  74static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  75        .name = OVS_DATAPATH_MCGROUP,
  76};
  77
  78static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
  79        .name = OVS_VPORT_MCGROUP,
  80};
  81
  82/* Check if need to build a reply message.
  83 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  84static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
  85                            unsigned int group)
  86{
  87        return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  88               genl_has_listeners(family, genl_info_net(info), group);
  89}
  90
  91static void ovs_notify(struct genl_family *family,
  92                       struct sk_buff *skb, struct genl_info *info)
  93{
  94        genl_notify(family, skb, info, 0, GFP_KERNEL);
  95}
  96
  97/**
  98 * DOC: Locking:
  99 *
 100 * All writes e.g. Writes to device state (add/remove datapath, port, set
 101 * operations on vports, etc.), Writes to other state (flow table
 102 * modifications, set miscellaneous datapath parameters, etc.) are protected
 103 * by ovs_lock.
 104 *
 105 * Reads are protected by RCU.
 106 *
 107 * There are a few special cases (mostly stats) that have their own
 108 * synchronization but they nest under all of above and don't interact with
 109 * each other.
 110 *
 111 * The RTNL lock nests inside ovs_mutex.
 112 */
 113
 114static DEFINE_MUTEX(ovs_mutex);
 115
 116void ovs_lock(void)
 117{
 118        mutex_lock(&ovs_mutex);
 119}
 120
 121void ovs_unlock(void)
 122{
 123        mutex_unlock(&ovs_mutex);
 124}
 125
 126#ifdef CONFIG_LOCKDEP
 127int lockdep_ovsl_is_held(void)
 128{
 129        if (debug_locks)
 130                return lockdep_is_held(&ovs_mutex);
 131        else
 132                return 1;
 133}
 134EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
 135#endif
 136
 137static struct vport *new_vport(const struct vport_parms *);
 138static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 139                             const struct sw_flow_key *,
 140                             const struct dp_upcall_info *);
 141static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 142                                  const struct sw_flow_key *,
 143                                  const struct dp_upcall_info *);
 144
 145/* Must be called with rcu_read_lock. */
 146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
 147{
 148        struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
 149
 150        if (dev) {
 151                struct vport *vport = ovs_internal_dev_get_vport(dev);
 152                if (vport)
 153                        return vport->dp;
 154        }
 155
 156        return NULL;
 157}
 158
 159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
 160 * returned dp pointer valid.
 161 */
 162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
 163{
 164        struct datapath *dp;
 165
 166        WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
 167        rcu_read_lock();
 168        dp = get_dp_rcu(net, dp_ifindex);
 169        rcu_read_unlock();
 170
 171        return dp;
 172}
 173
 174/* Must be called with rcu_read_lock or ovs_mutex. */
 175const char *ovs_dp_name(const struct datapath *dp)
 176{
 177        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 178        return ovs_vport_name(vport);
 179}
 180
 181static int get_dpifindex(const struct datapath *dp)
 182{
 183        struct vport *local;
 184        int ifindex;
 185
 186        rcu_read_lock();
 187
 188        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 189        if (local)
 190                ifindex = local->dev->ifindex;
 191        else
 192                ifindex = 0;
 193
 194        rcu_read_unlock();
 195
 196        return ifindex;
 197}
 198
 199static void destroy_dp_rcu(struct rcu_head *rcu)
 200{
 201        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 202
 203        ovs_flow_tbl_destroy(&dp->table);
 204        free_percpu(dp->stats_percpu);
 205        kfree(dp->ports);
 206        kfree(dp);
 207}
 208
 209static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 210                                            u16 port_no)
 211{
 212        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 213}
 214
 215/* Called with ovs_mutex or RCU read lock. */
 216struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 217{
 218        struct vport *vport;
 219        struct hlist_head *head;
 220
 221        head = vport_hash_bucket(dp, port_no);
 222        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 223                if (vport->port_no == port_no)
 224                        return vport;
 225        }
 226        return NULL;
 227}
 228
 229/* Called with ovs_mutex. */
 230static struct vport *new_vport(const struct vport_parms *parms)
 231{
 232        struct vport *vport;
 233
 234        vport = ovs_vport_add(parms);
 235        if (!IS_ERR(vport)) {
 236                struct datapath *dp = parms->dp;
 237                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 238
 239                hlist_add_head_rcu(&vport->dp_hash_node, head);
 240        }
 241        return vport;
 242}
 243
 244void ovs_dp_detach_port(struct vport *p)
 245{
 246        ASSERT_OVSL();
 247
 248        /* First drop references to device. */
 249        hlist_del_rcu(&p->dp_hash_node);
 250
 251        /* Then destroy it. */
 252        ovs_vport_del(p);
 253}
 254
 255/* Must be called with rcu_read_lock. */
 256void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 257{
 258        const struct vport *p = OVS_CB(skb)->input_vport;
 259        struct datapath *dp = p->dp;
 260        struct sw_flow *flow;
 261        struct sw_flow_actions *sf_acts;
 262        struct dp_stats_percpu *stats;
 263        u64 *stats_counter;
 264        u32 n_mask_hit;
 265
 266        stats = this_cpu_ptr(dp->stats_percpu);
 267
 268        /* Look up flow. */
 269        flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
 270        if (unlikely(!flow)) {
 271                struct dp_upcall_info upcall;
 272                int error;
 273
 274                memset(&upcall, 0, sizeof(upcall));
 275                upcall.cmd = OVS_PACKET_CMD_MISS;
 276                upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 277                upcall.mru = OVS_CB(skb)->mru;
 278                error = ovs_dp_upcall(dp, skb, key, &upcall);
 279                if (unlikely(error))
 280                        kfree_skb(skb);
 281                else
 282                        consume_skb(skb);
 283                stats_counter = &stats->n_missed;
 284                goto out;
 285        }
 286
 287        ovs_flow_stats_update(flow, key->tp.flags, skb);
 288        sf_acts = rcu_dereference(flow->sf_acts);
 289        ovs_execute_actions(dp, skb, sf_acts, key);
 290
 291        stats_counter = &stats->n_hit;
 292
 293out:
 294        /* Update datapath statistics. */
 295        u64_stats_update_begin(&stats->syncp);
 296        (*stats_counter)++;
 297        stats->n_mask_hit += n_mask_hit;
 298        u64_stats_update_end(&stats->syncp);
 299}
 300
 301int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 302                  const struct sw_flow_key *key,
 303                  const struct dp_upcall_info *upcall_info)
 304{
 305        struct dp_stats_percpu *stats;
 306        int err;
 307
 308        if (upcall_info->portid == 0) {
 309                err = -ENOTCONN;
 310                goto err;
 311        }
 312
 313        if (!skb_is_gso(skb))
 314                err = queue_userspace_packet(dp, skb, key, upcall_info);
 315        else
 316                err = queue_gso_packets(dp, skb, key, upcall_info);
 317        if (err)
 318                goto err;
 319
 320        return 0;
 321
 322err:
 323        stats = this_cpu_ptr(dp->stats_percpu);
 324
 325        u64_stats_update_begin(&stats->syncp);
 326        stats->n_lost++;
 327        u64_stats_update_end(&stats->syncp);
 328
 329        return err;
 330}
 331
 332static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 333                             const struct sw_flow_key *key,
 334                             const struct dp_upcall_info *upcall_info)
 335{
 336        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 337        struct sw_flow_key later_key;
 338        struct sk_buff *segs, *nskb;
 339        int err;
 340
 341        BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
 342        segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 343        if (IS_ERR(segs))
 344                return PTR_ERR(segs);
 345        if (segs == NULL)
 346                return -EINVAL;
 347
 348        if (gso_type & SKB_GSO_UDP) {
 349                /* The initial flow key extracted by ovs_flow_key_extract()
 350                 * in this case is for a first fragment, so we need to
 351                 * properly mark later fragments.
 352                 */
 353                later_key = *key;
 354                later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 355        }
 356
 357        /* Queue all of the segments. */
 358        skb = segs;
 359        do {
 360                if (gso_type & SKB_GSO_UDP && skb != segs)
 361                        key = &later_key;
 362
 363                err = queue_userspace_packet(dp, skb, key, upcall_info);
 364                if (err)
 365                        break;
 366
 367        } while ((skb = skb->next));
 368
 369        /* Free all of the segments. */
 370        skb = segs;
 371        do {
 372                nskb = skb->next;
 373                if (err)
 374                        kfree_skb(skb);
 375                else
 376                        consume_skb(skb);
 377        } while ((skb = nskb));
 378        return err;
 379}
 380
 381static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 382                              unsigned int hdrlen)
 383{
 384        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 385                + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
 386                + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
 387
 388        /* OVS_PACKET_ATTR_USERDATA */
 389        if (upcall_info->userdata)
 390                size += NLA_ALIGN(upcall_info->userdata->nla_len);
 391
 392        /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
 393        if (upcall_info->egress_tun_info)
 394                size += nla_total_size(ovs_tun_key_attr_size());
 395
 396        /* OVS_PACKET_ATTR_ACTIONS */
 397        if (upcall_info->actions_len)
 398                size += nla_total_size(upcall_info->actions_len);
 399
 400        /* OVS_PACKET_ATTR_MRU */
 401        if (upcall_info->mru)
 402                size += nla_total_size(sizeof(upcall_info->mru));
 403
 404        return size;
 405}
 406
 407static void pad_packet(struct datapath *dp, struct sk_buff *skb)
 408{
 409        if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
 410                size_t plen = NLA_ALIGN(skb->len) - skb->len;
 411
 412                if (plen > 0)
 413                        memset(skb_put(skb, plen), 0, plen);
 414        }
 415}
 416
 417static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 418                                  const struct sw_flow_key *key,
 419                                  const struct dp_upcall_info *upcall_info)
 420{
 421        struct ovs_header *upcall;
 422        struct sk_buff *nskb = NULL;
 423        struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 424        struct nlattr *nla;
 425        struct genl_info info = {
 426                .dst_sk = ovs_dp_get_net(dp)->genl_sock,
 427                .snd_portid = upcall_info->portid,
 428        };
 429        size_t len;
 430        unsigned int hlen;
 431        int err, dp_ifindex;
 432
 433        dp_ifindex = get_dpifindex(dp);
 434        if (!dp_ifindex)
 435                return -ENODEV;
 436
 437        if (skb_vlan_tag_present(skb)) {
 438                nskb = skb_clone(skb, GFP_ATOMIC);
 439                if (!nskb)
 440                        return -ENOMEM;
 441
 442                nskb = __vlan_hwaccel_push_inside(nskb);
 443                if (!nskb)
 444                        return -ENOMEM;
 445
 446                skb = nskb;
 447        }
 448
 449        if (nla_attr_size(skb->len) > USHRT_MAX) {
 450                err = -EFBIG;
 451                goto out;
 452        }
 453
 454        /* Complete checksum if needed */
 455        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 456            (err = skb_checksum_help(skb)))
 457                goto out;
 458
 459        /* Older versions of OVS user space enforce alignment of the last
 460         * Netlink attribute to NLA_ALIGNTO which would require extensive
 461         * padding logic. Only perform zerocopy if padding is not required.
 462         */
 463        if (dp->user_features & OVS_DP_F_UNALIGNED)
 464                hlen = skb_zerocopy_headlen(skb);
 465        else
 466                hlen = skb->len;
 467
 468        len = upcall_msg_size(upcall_info, hlen);
 469        user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
 470        if (!user_skb) {
 471                err = -ENOMEM;
 472                goto out;
 473        }
 474
 475        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 476                             0, upcall_info->cmd);
 477        upcall->dp_ifindex = dp_ifindex;
 478
 479        err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 480        BUG_ON(err);
 481
 482        if (upcall_info->userdata)
 483                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 484                          nla_len(upcall_info->userdata),
 485                          nla_data(upcall_info->userdata));
 486
 487        if (upcall_info->egress_tun_info) {
 488                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
 489                err = ovs_nla_put_tunnel_info(user_skb,
 490                                              upcall_info->egress_tun_info);
 491                BUG_ON(err);
 492                nla_nest_end(user_skb, nla);
 493        }
 494
 495        if (upcall_info->actions_len) {
 496                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
 497                err = ovs_nla_put_actions(upcall_info->actions,
 498                                          upcall_info->actions_len,
 499                                          user_skb);
 500                if (!err)
 501                        nla_nest_end(user_skb, nla);
 502                else
 503                        nla_nest_cancel(user_skb, nla);
 504        }
 505
 506        /* Add OVS_PACKET_ATTR_MRU */
 507        if (upcall_info->mru) {
 508                if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
 509                                upcall_info->mru)) {
 510                        err = -ENOBUFS;
 511                        goto out;
 512                }
 513                pad_packet(dp, user_skb);
 514        }
 515
 516        /* Only reserve room for attribute header, packet data is added
 517         * in skb_zerocopy() */
 518        if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 519                err = -ENOBUFS;
 520                goto out;
 521        }
 522        nla->nla_len = nla_attr_size(skb->len);
 523
 524        err = skb_zerocopy(user_skb, skb, skb->len, hlen);
 525        if (err)
 526                goto out;
 527
 528        /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
 529        pad_packet(dp, user_skb);
 530
 531        ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 532
 533        err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
 534        user_skb = NULL;
 535out:
 536        if (err)
 537                skb_tx_error(skb);
 538        kfree_skb(user_skb);
 539        kfree_skb(nskb);
 540        return err;
 541}
 542
 543static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 544{
 545        struct ovs_header *ovs_header = info->userhdr;
 546        struct net *net = sock_net(skb->sk);
 547        struct nlattr **a = info->attrs;
 548        struct sw_flow_actions *acts;
 549        struct sk_buff *packet;
 550        struct sw_flow *flow;
 551        struct sw_flow_actions *sf_acts;
 552        struct datapath *dp;
 553        struct ethhdr *eth;
 554        struct vport *input_vport;
 555        u16 mru = 0;
 556        int len;
 557        int err;
 558        bool log = !a[OVS_PACKET_ATTR_PROBE];
 559
 560        err = -EINVAL;
 561        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 562            !a[OVS_PACKET_ATTR_ACTIONS])
 563                goto err;
 564
 565        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 566        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 567        err = -ENOMEM;
 568        if (!packet)
 569                goto err;
 570        skb_reserve(packet, NET_IP_ALIGN);
 571
 572        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 573
 574        skb_reset_mac_header(packet);
 575        eth = eth_hdr(packet);
 576
 577        /* Normally, setting the skb 'protocol' field would be handled by a
 578         * call to eth_type_trans(), but it assumes there's a sending
 579         * device, which we may not have. */
 580        if (eth_proto_is_802_3(eth->h_proto))
 581                packet->protocol = eth->h_proto;
 582        else
 583                packet->protocol = htons(ETH_P_802_2);
 584
 585        /* Set packet's mru */
 586        if (a[OVS_PACKET_ATTR_MRU]) {
 587                mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
 588                packet->ignore_df = 1;
 589        }
 590        OVS_CB(packet)->mru = mru;
 591
 592        /* Build an sw_flow for sending this packet. */
 593        flow = ovs_flow_alloc();
 594        err = PTR_ERR(flow);
 595        if (IS_ERR(flow))
 596                goto err_kfree_skb;
 597
 598        err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
 599                                             packet, &flow->key, log);
 600        if (err)
 601                goto err_flow_free;
 602
 603        err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
 604                                   &flow->key, &acts, log);
 605        if (err)
 606                goto err_flow_free;
 607
 608        rcu_assign_pointer(flow->sf_acts, acts);
 609        packet->priority = flow->key.phy.priority;
 610        packet->mark = flow->key.phy.skb_mark;
 611
 612        rcu_read_lock();
 613        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
 614        err = -ENODEV;
 615        if (!dp)
 616                goto err_unlock;
 617
 618        input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
 619        if (!input_vport)
 620                input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
 621
 622        if (!input_vport)
 623                goto err_unlock;
 624
 625        packet->dev = input_vport->dev;
 626        OVS_CB(packet)->input_vport = input_vport;
 627        sf_acts = rcu_dereference(flow->sf_acts);
 628
 629        local_bh_disable();
 630        err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
 631        local_bh_enable();
 632        rcu_read_unlock();
 633
 634        ovs_flow_free(flow, false);
 635        return err;
 636
 637err_unlock:
 638        rcu_read_unlock();
 639err_flow_free:
 640        ovs_flow_free(flow, false);
 641err_kfree_skb:
 642        kfree_skb(packet);
 643err:
 644        return err;
 645}
 646
 647static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 648        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 649        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 650        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 651        [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
 652        [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
 653};
 654
 655static const struct genl_ops dp_packet_genl_ops[] = {
 656        { .cmd = OVS_PACKET_CMD_EXECUTE,
 657          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 658          .policy = packet_policy,
 659          .doit = ovs_packet_cmd_execute
 660        }
 661};
 662
 663static struct genl_family dp_packet_genl_family = {
 664        .id = GENL_ID_GENERATE,
 665        .hdrsize = sizeof(struct ovs_header),
 666        .name = OVS_PACKET_FAMILY,
 667        .version = OVS_PACKET_VERSION,
 668        .maxattr = OVS_PACKET_ATTR_MAX,
 669        .netnsok = true,
 670        .parallel_ops = true,
 671        .ops = dp_packet_genl_ops,
 672        .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
 673};
 674
 675static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 676                         struct ovs_dp_megaflow_stats *mega_stats)
 677{
 678        int i;
 679
 680        memset(mega_stats, 0, sizeof(*mega_stats));
 681
 682        stats->n_flows = ovs_flow_tbl_count(&dp->table);
 683        mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 684
 685        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 686
 687        for_each_possible_cpu(i) {
 688                const struct dp_stats_percpu *percpu_stats;
 689                struct dp_stats_percpu local_stats;
 690                unsigned int start;
 691
 692                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 693
 694                do {
 695                        start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
 696                        local_stats = *percpu_stats;
 697                } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
 698
 699                stats->n_hit += local_stats.n_hit;
 700                stats->n_missed += local_stats.n_missed;
 701                stats->n_lost += local_stats.n_lost;
 702                mega_stats->n_mask_hit += local_stats.n_mask_hit;
 703        }
 704}
 705
 706static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
 707{
 708        return ovs_identifier_is_ufid(sfid) &&
 709               !(ufid_flags & OVS_UFID_F_OMIT_KEY);
 710}
 711
 712static bool should_fill_mask(uint32_t ufid_flags)
 713{
 714        return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
 715}
 716
 717static bool should_fill_actions(uint32_t ufid_flags)
 718{
 719        return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
 720}
 721
 722static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
 723                                    const struct sw_flow_id *sfid,
 724                                    uint32_t ufid_flags)
 725{
 726        size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 727
 728        /* OVS_FLOW_ATTR_UFID */
 729        if (sfid && ovs_identifier_is_ufid(sfid))
 730                len += nla_total_size(sfid->ufid_len);
 731
 732        /* OVS_FLOW_ATTR_KEY */
 733        if (!sfid || should_fill_key(sfid, ufid_flags))
 734                len += nla_total_size(ovs_key_attr_size());
 735
 736        /* OVS_FLOW_ATTR_MASK */
 737        if (should_fill_mask(ufid_flags))
 738                len += nla_total_size(ovs_key_attr_size());
 739
 740        /* OVS_FLOW_ATTR_ACTIONS */
 741        if (should_fill_actions(ufid_flags))
 742                len += nla_total_size(acts->orig_len);
 743
 744        return len
 745                + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 746                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 747                + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
 748}
 749
 750/* Called with ovs_mutex or RCU read lock. */
 751static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 752                                   struct sk_buff *skb)
 753{
 754        struct ovs_flow_stats stats;
 755        __be16 tcp_flags;
 756        unsigned long used;
 757
 758        ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 759
 760        if (used &&
 761            nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
 762                return -EMSGSIZE;
 763
 764        if (stats.n_packets &&
 765            nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
 766                return -EMSGSIZE;
 767
 768        if ((u8)ntohs(tcp_flags) &&
 769             nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
 770                return -EMSGSIZE;
 771
 772        return 0;
 773}
 774
 775/* Called with ovs_mutex or RCU read lock. */
 776static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 777                                     struct sk_buff *skb, int skb_orig_len)
 778{
 779        struct nlattr *start;
 780        int err;
 781
 782        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 783         * this is the first flow to be dumped into 'skb'.  This is unusual for
 784         * Netlink but individual action lists can be longer than
 785         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 786         * The userspace caller can always fetch the actions separately if it
 787         * really wants them.  (Most userspace callers in fact don't care.)
 788         *
 789         * This can only fail for dump operations because the skb is always
 790         * properly sized for single flows.
 791         */
 792        start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
 793        if (start) {
 794                const struct sw_flow_actions *sf_acts;
 795
 796                sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 797                err = ovs_nla_put_actions(sf_acts->actions,
 798                                          sf_acts->actions_len, skb);
 799
 800                if (!err)
 801                        nla_nest_end(skb, start);
 802                else {
 803                        if (skb_orig_len)
 804                                return err;
 805
 806                        nla_nest_cancel(skb, start);
 807                }
 808        } else if (skb_orig_len) {
 809                return -EMSGSIZE;
 810        }
 811
 812        return 0;
 813}
 814
 815/* Called with ovs_mutex or RCU read lock. */
 816static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 817                                  struct sk_buff *skb, u32 portid,
 818                                  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 819{
 820        const int skb_orig_len = skb->len;
 821        struct ovs_header *ovs_header;
 822        int err;
 823
 824        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
 825                                 flags, cmd);
 826        if (!ovs_header)
 827                return -EMSGSIZE;
 828
 829        ovs_header->dp_ifindex = dp_ifindex;
 830
 831        err = ovs_nla_put_identifier(flow, skb);
 832        if (err)
 833                goto error;
 834
 835        if (should_fill_key(&flow->id, ufid_flags)) {
 836                err = ovs_nla_put_masked_key(flow, skb);
 837                if (err)
 838                        goto error;
 839        }
 840
 841        if (should_fill_mask(ufid_flags)) {
 842                err = ovs_nla_put_mask(flow, skb);
 843                if (err)
 844                        goto error;
 845        }
 846
 847        err = ovs_flow_cmd_fill_stats(flow, skb);
 848        if (err)
 849                goto error;
 850
 851        if (should_fill_actions(ufid_flags)) {
 852                err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
 853                if (err)
 854                        goto error;
 855        }
 856
 857        genlmsg_end(skb, ovs_header);
 858        return 0;
 859
 860error:
 861        genlmsg_cancel(skb, ovs_header);
 862        return err;
 863}
 864
 865/* May not be called with RCU read lock. */
 866static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
 867                                               const struct sw_flow_id *sfid,
 868                                               struct genl_info *info,
 869                                               bool always,
 870                                               uint32_t ufid_flags)
 871{
 872        struct sk_buff *skb;
 873        size_t len;
 874
 875        if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 876                return NULL;
 877
 878        len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
 879        skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
 880        if (!skb)
 881                return ERR_PTR(-ENOMEM);
 882
 883        return skb;
 884}
 885
 886/* Called with ovs_mutex. */
 887static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 888                                               int dp_ifindex,
 889                                               struct genl_info *info, u8 cmd,
 890                                               bool always, u32 ufid_flags)
 891{
 892        struct sk_buff *skb;
 893        int retval;
 894
 895        skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
 896                                      &flow->id, info, always, ufid_flags);
 897        if (IS_ERR_OR_NULL(skb))
 898                return skb;
 899
 900        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 901                                        info->snd_portid, info->snd_seq, 0,
 902                                        cmd, ufid_flags);
 903        BUG_ON(retval < 0);
 904        return skb;
 905}
 906
 907static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 908{
 909        struct net *net = sock_net(skb->sk);
 910        struct nlattr **a = info->attrs;
 911        struct ovs_header *ovs_header = info->userhdr;
 912        struct sw_flow *flow = NULL, *new_flow;
 913        struct sw_flow_mask mask;
 914        struct sk_buff *reply;
 915        struct datapath *dp;
 916        struct sw_flow_key key;
 917        struct sw_flow_actions *acts;
 918        struct sw_flow_match match;
 919        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 920        int error;
 921        bool log = !a[OVS_FLOW_ATTR_PROBE];
 922
 923        /* Must have key and actions. */
 924        error = -EINVAL;
 925        if (!a[OVS_FLOW_ATTR_KEY]) {
 926                OVS_NLERR(log, "Flow key attr not present in new flow.");
 927                goto error;
 928        }
 929        if (!a[OVS_FLOW_ATTR_ACTIONS]) {
 930                OVS_NLERR(log, "Flow actions attr not present in new flow.");
 931                goto error;
 932        }
 933
 934        /* Most of the time we need to allocate a new flow, do it before
 935         * locking.
 936         */
 937        new_flow = ovs_flow_alloc();
 938        if (IS_ERR(new_flow)) {
 939                error = PTR_ERR(new_flow);
 940                goto error;
 941        }
 942
 943        /* Extract key. */
 944        ovs_match_init(&match, &key, &mask);
 945        error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 946                                  a[OVS_FLOW_ATTR_MASK], log);
 947        if (error)
 948                goto err_kfree_flow;
 949
 950        ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
 951
 952        /* Extract flow identifier. */
 953        error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
 954                                       &key, log);
 955        if (error)
 956                goto err_kfree_flow;
 957
 958        /* Validate actions. */
 959        error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 960                                     &new_flow->key, &acts, log);
 961        if (error) {
 962                OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
 963                goto err_kfree_flow;
 964        }
 965
 966        reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
 967                                        ufid_flags);
 968        if (IS_ERR(reply)) {
 969                error = PTR_ERR(reply);
 970                goto err_kfree_acts;
 971        }
 972
 973        ovs_lock();
 974        dp = get_dp(net, ovs_header->dp_ifindex);
 975        if (unlikely(!dp)) {
 976                error = -ENODEV;
 977                goto err_unlock_ovs;
 978        }
 979
 980        /* Check if this is a duplicate flow */
 981        if (ovs_identifier_is_ufid(&new_flow->id))
 982                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
 983        if (!flow)
 984                flow = ovs_flow_tbl_lookup(&dp->table, &key);
 985        if (likely(!flow)) {
 986                rcu_assign_pointer(new_flow->sf_acts, acts);
 987
 988                /* Put flow in bucket. */
 989                error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
 990                if (unlikely(error)) {
 991                        acts = NULL;
 992                        goto err_unlock_ovs;
 993                }
 994
 995                if (unlikely(reply)) {
 996                        error = ovs_flow_cmd_fill_info(new_flow,
 997                                                       ovs_header->dp_ifindex,
 998                                                       reply, info->snd_portid,
 999                                                       info->snd_seq, 0,
1000                                                       OVS_FLOW_CMD_NEW,
1001                                                       ufid_flags);
1002                        BUG_ON(error < 0);
1003                }
1004                ovs_unlock();
1005        } else {
1006                struct sw_flow_actions *old_acts;
1007
1008                /* Bail out if we're not allowed to modify an existing flow.
1009                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1010                 * because Generic Netlink treats the latter as a dump
1011                 * request.  We also accept NLM_F_EXCL in case that bug ever
1012                 * gets fixed.
1013                 */
1014                if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1015                                                         | NLM_F_EXCL))) {
1016                        error = -EEXIST;
1017                        goto err_unlock_ovs;
1018                }
1019                /* The flow identifier has to be the same for flow updates.
1020                 * Look for any overlapping flow.
1021                 */
1022                if (unlikely(!ovs_flow_cmp(flow, &match))) {
1023                        if (ovs_identifier_is_key(&flow->id))
1024                                flow = ovs_flow_tbl_lookup_exact(&dp->table,
1025                                                                 &match);
1026                        else /* UFID matches but key is different */
1027                                flow = NULL;
1028                        if (!flow) {
1029                                error = -ENOENT;
1030                                goto err_unlock_ovs;
1031                        }
1032                }
1033                /* Update actions. */
1034                old_acts = ovsl_dereference(flow->sf_acts);
1035                rcu_assign_pointer(flow->sf_acts, acts);
1036
1037                if (unlikely(reply)) {
1038                        error = ovs_flow_cmd_fill_info(flow,
1039                                                       ovs_header->dp_ifindex,
1040                                                       reply, info->snd_portid,
1041                                                       info->snd_seq, 0,
1042                                                       OVS_FLOW_CMD_NEW,
1043                                                       ufid_flags);
1044                        BUG_ON(error < 0);
1045                }
1046                ovs_unlock();
1047
1048                ovs_nla_free_flow_actions_rcu(old_acts);
1049                ovs_flow_free(new_flow, false);
1050        }
1051
1052        if (reply)
1053                ovs_notify(&dp_flow_genl_family, reply, info);
1054        return 0;
1055
1056err_unlock_ovs:
1057        ovs_unlock();
1058        kfree_skb(reply);
1059err_kfree_acts:
1060        ovs_nla_free_flow_actions(acts);
1061err_kfree_flow:
1062        ovs_flow_free(new_flow, false);
1063error:
1064        return error;
1065}
1066
1067/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1068static struct sw_flow_actions *get_flow_actions(struct net *net,
1069                                                const struct nlattr *a,
1070                                                const struct sw_flow_key *key,
1071                                                const struct sw_flow_mask *mask,
1072                                                bool log)
1073{
1074        struct sw_flow_actions *acts;
1075        struct sw_flow_key masked_key;
1076        int error;
1077
1078        ovs_flow_mask_key(&masked_key, key, true, mask);
1079        error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1080        if (error) {
1081                OVS_NLERR(log,
1082                          "Actions may not be safe on all matching packets");
1083                return ERR_PTR(error);
1084        }
1085
1086        return acts;
1087}
1088
1089static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1090{
1091        struct net *net = sock_net(skb->sk);
1092        struct nlattr **a = info->attrs;
1093        struct ovs_header *ovs_header = info->userhdr;
1094        struct sw_flow_key key;
1095        struct sw_flow *flow;
1096        struct sw_flow_mask mask;
1097        struct sk_buff *reply = NULL;
1098        struct datapath *dp;
1099        struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1100        struct sw_flow_match match;
1101        struct sw_flow_id sfid;
1102        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1103        int error;
1104        bool log = !a[OVS_FLOW_ATTR_PROBE];
1105        bool ufid_present;
1106
1107        /* Extract key. */
1108        error = -EINVAL;
1109        if (!a[OVS_FLOW_ATTR_KEY]) {
1110                OVS_NLERR(log, "Flow key attribute not present in set flow.");
1111                goto error;
1112        }
1113
1114        ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1115        ovs_match_init(&match, &key, &mask);
1116        error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1117                                  a[OVS_FLOW_ATTR_MASK], log);
1118        if (error)
1119                goto error;
1120
1121        /* Validate actions. */
1122        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1123                acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1124                                        &mask, log);
1125                if (IS_ERR(acts)) {
1126                        error = PTR_ERR(acts);
1127                        goto error;
1128                }
1129
1130                /* Can allocate before locking if have acts. */
1131                reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1132                                                ufid_flags);
1133                if (IS_ERR(reply)) {
1134                        error = PTR_ERR(reply);
1135                        goto err_kfree_acts;
1136                }
1137        }
1138
1139        ovs_lock();
1140        dp = get_dp(net, ovs_header->dp_ifindex);
1141        if (unlikely(!dp)) {
1142                error = -ENODEV;
1143                goto err_unlock_ovs;
1144        }
1145        /* Check that the flow exists. */
1146        if (ufid_present)
1147                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1148        else
1149                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1150        if (unlikely(!flow)) {
1151                error = -ENOENT;
1152                goto err_unlock_ovs;
1153        }
1154
1155        /* Update actions, if present. */
1156        if (likely(acts)) {
1157                old_acts = ovsl_dereference(flow->sf_acts);
1158                rcu_assign_pointer(flow->sf_acts, acts);
1159
1160                if (unlikely(reply)) {
1161                        error = ovs_flow_cmd_fill_info(flow,
1162                                                       ovs_header->dp_ifindex,
1163                                                       reply, info->snd_portid,
1164                                                       info->snd_seq, 0,
1165                                                       OVS_FLOW_CMD_NEW,
1166                                                       ufid_flags);
1167                        BUG_ON(error < 0);
1168                }
1169        } else {
1170                /* Could not alloc without acts before locking. */
1171                reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1172                                                info, OVS_FLOW_CMD_NEW, false,
1173                                                ufid_flags);
1174
1175                if (IS_ERR(reply)) {
1176                        error = PTR_ERR(reply);
1177                        goto err_unlock_ovs;
1178                }
1179        }
1180
1181        /* Clear stats. */
1182        if (a[OVS_FLOW_ATTR_CLEAR])
1183                ovs_flow_stats_clear(flow);
1184        ovs_unlock();
1185
1186        if (reply)
1187                ovs_notify(&dp_flow_genl_family, reply, info);
1188        if (old_acts)
1189                ovs_nla_free_flow_actions_rcu(old_acts);
1190
1191        return 0;
1192
1193err_unlock_ovs:
1194        ovs_unlock();
1195        kfree_skb(reply);
1196err_kfree_acts:
1197        ovs_nla_free_flow_actions(acts);
1198error:
1199        return error;
1200}
1201
1202static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1203{
1204        struct nlattr **a = info->attrs;
1205        struct ovs_header *ovs_header = info->userhdr;
1206        struct net *net = sock_net(skb->sk);
1207        struct sw_flow_key key;
1208        struct sk_buff *reply;
1209        struct sw_flow *flow;
1210        struct datapath *dp;
1211        struct sw_flow_match match;
1212        struct sw_flow_id ufid;
1213        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1214        int err = 0;
1215        bool log = !a[OVS_FLOW_ATTR_PROBE];
1216        bool ufid_present;
1217
1218        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1219        if (a[OVS_FLOW_ATTR_KEY]) {
1220                ovs_match_init(&match, &key, NULL);
1221                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1222                                        log);
1223        } else if (!ufid_present) {
1224                OVS_NLERR(log,
1225                          "Flow get message rejected, Key attribute missing.");
1226                err = -EINVAL;
1227        }
1228        if (err)
1229                return err;
1230
1231        ovs_lock();
1232        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1233        if (!dp) {
1234                err = -ENODEV;
1235                goto unlock;
1236        }
1237
1238        if (ufid_present)
1239                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1240        else
1241                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1242        if (!flow) {
1243                err = -ENOENT;
1244                goto unlock;
1245        }
1246
1247        reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1248                                        OVS_FLOW_CMD_NEW, true, ufid_flags);
1249        if (IS_ERR(reply)) {
1250                err = PTR_ERR(reply);
1251                goto unlock;
1252        }
1253
1254        ovs_unlock();
1255        return genlmsg_reply(reply, info);
1256unlock:
1257        ovs_unlock();
1258        return err;
1259}
1260
1261static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1262{
1263        struct nlattr **a = info->attrs;
1264        struct ovs_header *ovs_header = info->userhdr;
1265        struct net *net = sock_net(skb->sk);
1266        struct sw_flow_key key;
1267        struct sk_buff *reply;
1268        struct sw_flow *flow = NULL;
1269        struct datapath *dp;
1270        struct sw_flow_match match;
1271        struct sw_flow_id ufid;
1272        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1273        int err;
1274        bool log = !a[OVS_FLOW_ATTR_PROBE];
1275        bool ufid_present;
1276
1277        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1278        if (a[OVS_FLOW_ATTR_KEY]) {
1279                ovs_match_init(&match, &key, NULL);
1280                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1281                                        NULL, log);
1282                if (unlikely(err))
1283                        return err;
1284        }
1285
1286        ovs_lock();
1287        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1288        if (unlikely(!dp)) {
1289                err = -ENODEV;
1290                goto unlock;
1291        }
1292
1293        if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1294                err = ovs_flow_tbl_flush(&dp->table);
1295                goto unlock;
1296        }
1297
1298        if (ufid_present)
1299                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1300        else
1301                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1302        if (unlikely(!flow)) {
1303                err = -ENOENT;
1304                goto unlock;
1305        }
1306
1307        ovs_flow_tbl_remove(&dp->table, flow);
1308        ovs_unlock();
1309
1310        reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1311                                        &flow->id, info, false, ufid_flags);
1312        if (likely(reply)) {
1313                if (likely(!IS_ERR(reply))) {
1314                        rcu_read_lock();        /*To keep RCU checker happy. */
1315                        err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1316                                                     reply, info->snd_portid,
1317                                                     info->snd_seq, 0,
1318                                                     OVS_FLOW_CMD_DEL,
1319                                                     ufid_flags);
1320                        rcu_read_unlock();
1321                        BUG_ON(err < 0);
1322
1323                        ovs_notify(&dp_flow_genl_family, reply, info);
1324                } else {
1325                        netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1326                }
1327        }
1328
1329        ovs_flow_free(flow, true);
1330        return 0;
1331unlock:
1332        ovs_unlock();
1333        return err;
1334}
1335
1336static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1337{
1338        struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1339        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1340        struct table_instance *ti;
1341        struct datapath *dp;
1342        u32 ufid_flags;
1343        int err;
1344
1345        err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1346                            OVS_FLOW_ATTR_MAX, flow_policy);
1347        if (err)
1348                return err;
1349        ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1350
1351        rcu_read_lock();
1352        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1353        if (!dp) {
1354                rcu_read_unlock();
1355                return -ENODEV;
1356        }
1357
1358        ti = rcu_dereference(dp->table.ti);
1359        for (;;) {
1360                struct sw_flow *flow;
1361                u32 bucket, obj;
1362
1363                bucket = cb->args[0];
1364                obj = cb->args[1];
1365                flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1366                if (!flow)
1367                        break;
1368
1369                if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1370                                           NETLINK_CB(cb->skb).portid,
1371                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1372                                           OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1373                        break;
1374
1375                cb->args[0] = bucket;
1376                cb->args[1] = obj;
1377        }
1378        rcu_read_unlock();
1379        return skb->len;
1380}
1381
1382static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1383        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1384        [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1385        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1386        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1387        [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1388        [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1389        [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1390};
1391
1392static const struct genl_ops dp_flow_genl_ops[] = {
1393        { .cmd = OVS_FLOW_CMD_NEW,
1394          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1395          .policy = flow_policy,
1396          .doit = ovs_flow_cmd_new
1397        },
1398        { .cmd = OVS_FLOW_CMD_DEL,
1399          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1400          .policy = flow_policy,
1401          .doit = ovs_flow_cmd_del
1402        },
1403        { .cmd = OVS_FLOW_CMD_GET,
1404          .flags = 0,               /* OK for unprivileged users. */
1405          .policy = flow_policy,
1406          .doit = ovs_flow_cmd_get,
1407          .dumpit = ovs_flow_cmd_dump
1408        },
1409        { .cmd = OVS_FLOW_CMD_SET,
1410          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1411          .policy = flow_policy,
1412          .doit = ovs_flow_cmd_set,
1413        },
1414};
1415
1416static struct genl_family dp_flow_genl_family = {
1417        .id = GENL_ID_GENERATE,
1418        .hdrsize = sizeof(struct ovs_header),
1419        .name = OVS_FLOW_FAMILY,
1420        .version = OVS_FLOW_VERSION,
1421        .maxattr = OVS_FLOW_ATTR_MAX,
1422        .netnsok = true,
1423        .parallel_ops = true,
1424        .ops = dp_flow_genl_ops,
1425        .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1426        .mcgrps = &ovs_dp_flow_multicast_group,
1427        .n_mcgrps = 1,
1428};
1429
1430static size_t ovs_dp_cmd_msg_size(void)
1431{
1432        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1433
1434        msgsize += nla_total_size(IFNAMSIZ);
1435        msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1436        msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
1437        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1438
1439        return msgsize;
1440}
1441
1442/* Called with ovs_mutex. */
1443static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1444                                u32 portid, u32 seq, u32 flags, u8 cmd)
1445{
1446        struct ovs_header *ovs_header;
1447        struct ovs_dp_stats dp_stats;
1448        struct ovs_dp_megaflow_stats dp_megaflow_stats;
1449        int err;
1450
1451        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1452                                   flags, cmd);
1453        if (!ovs_header)
1454                goto error;
1455
1456        ovs_header->dp_ifindex = get_dpifindex(dp);
1457
1458        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1459        if (err)
1460                goto nla_put_failure;
1461
1462        get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1463        if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1464                        &dp_stats))
1465                goto nla_put_failure;
1466
1467        if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1468                        sizeof(struct ovs_dp_megaflow_stats),
1469                        &dp_megaflow_stats))
1470                goto nla_put_failure;
1471
1472        if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1473                goto nla_put_failure;
1474
1475        genlmsg_end(skb, ovs_header);
1476        return 0;
1477
1478nla_put_failure:
1479        genlmsg_cancel(skb, ovs_header);
1480error:
1481        return -EMSGSIZE;
1482}
1483
1484static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1485{
1486        return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
1487}
1488
1489/* Called with rcu_read_lock or ovs_mutex. */
1490static struct datapath *lookup_datapath(struct net *net,
1491                                        const struct ovs_header *ovs_header,
1492                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1493{
1494        struct datapath *dp;
1495
1496        if (!a[OVS_DP_ATTR_NAME])
1497                dp = get_dp(net, ovs_header->dp_ifindex);
1498        else {
1499                struct vport *vport;
1500
1501                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1502                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1503        }
1504        return dp ? dp : ERR_PTR(-ENODEV);
1505}
1506
1507static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1508{
1509        struct datapath *dp;
1510
1511        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1512        if (IS_ERR(dp))
1513                return;
1514
1515        WARN(dp->user_features, "Dropping previously announced user features\n");
1516        dp->user_features = 0;
1517}
1518
1519static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1520{
1521        if (a[OVS_DP_ATTR_USER_FEATURES])
1522                dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1523}
1524
1525static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1526{
1527        struct nlattr **a = info->attrs;
1528        struct vport_parms parms;
1529        struct sk_buff *reply;
1530        struct datapath *dp;
1531        struct vport *vport;
1532        struct ovs_net *ovs_net;
1533        int err, i;
1534
1535        err = -EINVAL;
1536        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1537                goto err;
1538
1539        reply = ovs_dp_cmd_alloc_info(info);
1540        if (!reply)
1541                return -ENOMEM;
1542
1543        err = -ENOMEM;
1544        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1545        if (dp == NULL)
1546                goto err_free_reply;
1547
1548        ovs_dp_set_net(dp, sock_net(skb->sk));
1549
1550        /* Allocate table. */
1551        err = ovs_flow_tbl_init(&dp->table);
1552        if (err)
1553                goto err_free_dp;
1554
1555        dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1556        if (!dp->stats_percpu) {
1557                err = -ENOMEM;
1558                goto err_destroy_table;
1559        }
1560
1561        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1562                            GFP_KERNEL);
1563        if (!dp->ports) {
1564                err = -ENOMEM;
1565                goto err_destroy_percpu;
1566        }
1567
1568        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1569                INIT_HLIST_HEAD(&dp->ports[i]);
1570
1571        /* Set up our datapath device. */
1572        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1573        parms.type = OVS_VPORT_TYPE_INTERNAL;
1574        parms.options = NULL;
1575        parms.dp = dp;
1576        parms.port_no = OVSP_LOCAL;
1577        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1578
1579        ovs_dp_change(dp, a);
1580
1581        /* So far only local changes have been made, now need the lock. */
1582        ovs_lock();
1583
1584        vport = new_vport(&parms);
1585        if (IS_ERR(vport)) {
1586                err = PTR_ERR(vport);
1587                if (err == -EBUSY)
1588                        err = -EEXIST;
1589
1590                if (err == -EEXIST) {
1591                        /* An outdated user space instance that does not understand
1592                         * the concept of user_features has attempted to create a new
1593                         * datapath and is likely to reuse it. Drop all user features.
1594                         */
1595                        if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1596                                ovs_dp_reset_user_features(skb, info);
1597                }
1598
1599                goto err_destroy_ports_array;
1600        }
1601
1602        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1603                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1604        BUG_ON(err < 0);
1605
1606        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1607        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1608
1609        ovs_unlock();
1610
1611        ovs_notify(&dp_datapath_genl_family, reply, info);
1612        return 0;
1613
1614err_destroy_ports_array:
1615        ovs_unlock();
1616        kfree(dp->ports);
1617err_destroy_percpu:
1618        free_percpu(dp->stats_percpu);
1619err_destroy_table:
1620        ovs_flow_tbl_destroy(&dp->table);
1621err_free_dp:
1622        kfree(dp);
1623err_free_reply:
1624        kfree_skb(reply);
1625err:
1626        return err;
1627}
1628
1629/* Called with ovs_mutex. */
1630static void __dp_destroy(struct datapath *dp)
1631{
1632        int i;
1633
1634        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1635                struct vport *vport;
1636                struct hlist_node *n;
1637
1638                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1639                        if (vport->port_no != OVSP_LOCAL)
1640                                ovs_dp_detach_port(vport);
1641        }
1642
1643        list_del_rcu(&dp->list_node);
1644
1645        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1646         * all ports in datapath are destroyed first before freeing datapath.
1647         */
1648        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1649
1650        /* RCU destroy the flow table */
1651        call_rcu(&dp->rcu, destroy_dp_rcu);
1652}
1653
1654static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1655{
1656        struct sk_buff *reply;
1657        struct datapath *dp;
1658        int err;
1659
1660        reply = ovs_dp_cmd_alloc_info(info);
1661        if (!reply)
1662                return -ENOMEM;
1663
1664        ovs_lock();
1665        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1666        err = PTR_ERR(dp);
1667        if (IS_ERR(dp))
1668                goto err_unlock_free;
1669
1670        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1671                                   info->snd_seq, 0, OVS_DP_CMD_DEL);
1672        BUG_ON(err < 0);
1673
1674        __dp_destroy(dp);
1675        ovs_unlock();
1676
1677        ovs_notify(&dp_datapath_genl_family, reply, info);
1678
1679        return 0;
1680
1681err_unlock_free:
1682        ovs_unlock();
1683        kfree_skb(reply);
1684        return err;
1685}
1686
1687static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1688{
1689        struct sk_buff *reply;
1690        struct datapath *dp;
1691        int err;
1692
1693        reply = ovs_dp_cmd_alloc_info(info);
1694        if (!reply)
1695                return -ENOMEM;
1696
1697        ovs_lock();
1698        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1699        err = PTR_ERR(dp);
1700        if (IS_ERR(dp))
1701                goto err_unlock_free;
1702
1703        ovs_dp_change(dp, info->attrs);
1704
1705        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1706                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1707        BUG_ON(err < 0);
1708
1709        ovs_unlock();
1710        ovs_notify(&dp_datapath_genl_family, reply, info);
1711
1712        return 0;
1713
1714err_unlock_free:
1715        ovs_unlock();
1716        kfree_skb(reply);
1717        return err;
1718}
1719
1720static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1721{
1722        struct sk_buff *reply;
1723        struct datapath *dp;
1724        int err;
1725
1726        reply = ovs_dp_cmd_alloc_info(info);
1727        if (!reply)
1728                return -ENOMEM;
1729
1730        ovs_lock();
1731        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1732        if (IS_ERR(dp)) {
1733                err = PTR_ERR(dp);
1734                goto err_unlock_free;
1735        }
1736        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1737                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1738        BUG_ON(err < 0);
1739        ovs_unlock();
1740
1741        return genlmsg_reply(reply, info);
1742
1743err_unlock_free:
1744        ovs_unlock();
1745        kfree_skb(reply);
1746        return err;
1747}
1748
1749static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1750{
1751        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1752        struct datapath *dp;
1753        int skip = cb->args[0];
1754        int i = 0;
1755
1756        ovs_lock();
1757        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1758                if (i >= skip &&
1759                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1760                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1761                                         OVS_DP_CMD_NEW) < 0)
1762                        break;
1763                i++;
1764        }
1765        ovs_unlock();
1766
1767        cb->args[0] = i;
1768
1769        return skb->len;
1770}
1771
1772static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1773        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1774        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1775        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1776};
1777
1778static const struct genl_ops dp_datapath_genl_ops[] = {
1779        { .cmd = OVS_DP_CMD_NEW,
1780          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1781          .policy = datapath_policy,
1782          .doit = ovs_dp_cmd_new
1783        },
1784        { .cmd = OVS_DP_CMD_DEL,
1785          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1786          .policy = datapath_policy,
1787          .doit = ovs_dp_cmd_del
1788        },
1789        { .cmd = OVS_DP_CMD_GET,
1790          .flags = 0,               /* OK for unprivileged users. */
1791          .policy = datapath_policy,
1792          .doit = ovs_dp_cmd_get,
1793          .dumpit = ovs_dp_cmd_dump
1794        },
1795        { .cmd = OVS_DP_CMD_SET,
1796          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1797          .policy = datapath_policy,
1798          .doit = ovs_dp_cmd_set,
1799        },
1800};
1801
1802static struct genl_family dp_datapath_genl_family = {
1803        .id = GENL_ID_GENERATE,
1804        .hdrsize = sizeof(struct ovs_header),
1805        .name = OVS_DATAPATH_FAMILY,
1806        .version = OVS_DATAPATH_VERSION,
1807        .maxattr = OVS_DP_ATTR_MAX,
1808        .netnsok = true,
1809        .parallel_ops = true,
1810        .ops = dp_datapath_genl_ops,
1811        .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1812        .mcgrps = &ovs_dp_datapath_multicast_group,
1813        .n_mcgrps = 1,
1814};
1815
1816/* Called with ovs_mutex or RCU read lock. */
1817static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1818                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1819{
1820        struct ovs_header *ovs_header;
1821        struct ovs_vport_stats vport_stats;
1822        int err;
1823
1824        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1825                                 flags, cmd);
1826        if (!ovs_header)
1827                return -EMSGSIZE;
1828
1829        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1830
1831        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1832            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1833            nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1834                           ovs_vport_name(vport)))
1835                goto nla_put_failure;
1836
1837        ovs_vport_get_stats(vport, &vport_stats);
1838        if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1839                    &vport_stats))
1840                goto nla_put_failure;
1841
1842        if (ovs_vport_get_upcall_portids(vport, skb))
1843                goto nla_put_failure;
1844
1845        err = ovs_vport_get_options(vport, skb);
1846        if (err == -EMSGSIZE)
1847                goto error;
1848
1849        genlmsg_end(skb, ovs_header);
1850        return 0;
1851
1852nla_put_failure:
1853        err = -EMSGSIZE;
1854error:
1855        genlmsg_cancel(skb, ovs_header);
1856        return err;
1857}
1858
1859static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1860{
1861        return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1862}
1863
1864/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1865struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1866                                         u32 seq, u8 cmd)
1867{
1868        struct sk_buff *skb;
1869        int retval;
1870
1871        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1872        if (!skb)
1873                return ERR_PTR(-ENOMEM);
1874
1875        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1876        BUG_ON(retval < 0);
1877
1878        return skb;
1879}
1880
1881/* Called with ovs_mutex or RCU read lock. */
1882static struct vport *lookup_vport(struct net *net,
1883                                  const struct ovs_header *ovs_header,
1884                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1885{
1886        struct datapath *dp;
1887        struct vport *vport;
1888
1889        if (a[OVS_VPORT_ATTR_NAME]) {
1890                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1891                if (!vport)
1892                        return ERR_PTR(-ENODEV);
1893                if (ovs_header->dp_ifindex &&
1894                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1895                        return ERR_PTR(-ENODEV);
1896                return vport;
1897        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1898                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1899
1900                if (port_no >= DP_MAX_PORTS)
1901                        return ERR_PTR(-EFBIG);
1902
1903                dp = get_dp(net, ovs_header->dp_ifindex);
1904                if (!dp)
1905                        return ERR_PTR(-ENODEV);
1906
1907                vport = ovs_vport_ovsl_rcu(dp, port_no);
1908                if (!vport)
1909                        return ERR_PTR(-ENODEV);
1910                return vport;
1911        } else
1912                return ERR_PTR(-EINVAL);
1913}
1914
1915static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1916{
1917        struct nlattr **a = info->attrs;
1918        struct ovs_header *ovs_header = info->userhdr;
1919        struct vport_parms parms;
1920        struct sk_buff *reply;
1921        struct vport *vport;
1922        struct datapath *dp;
1923        u32 port_no;
1924        int err;
1925
1926        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1927            !a[OVS_VPORT_ATTR_UPCALL_PID])
1928                return -EINVAL;
1929
1930        port_no = a[OVS_VPORT_ATTR_PORT_NO]
1931                ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1932        if (port_no >= DP_MAX_PORTS)
1933                return -EFBIG;
1934
1935        reply = ovs_vport_cmd_alloc_info();
1936        if (!reply)
1937                return -ENOMEM;
1938
1939        ovs_lock();
1940restart:
1941        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1942        err = -ENODEV;
1943        if (!dp)
1944                goto exit_unlock_free;
1945
1946        if (port_no) {
1947                vport = ovs_vport_ovsl(dp, port_no);
1948                err = -EBUSY;
1949                if (vport)
1950                        goto exit_unlock_free;
1951        } else {
1952                for (port_no = 1; ; port_no++) {
1953                        if (port_no >= DP_MAX_PORTS) {
1954                                err = -EFBIG;
1955                                goto exit_unlock_free;
1956                        }
1957                        vport = ovs_vport_ovsl(dp, port_no);
1958                        if (!vport)
1959                                break;
1960                }
1961        }
1962
1963        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1964        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1965        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1966        parms.dp = dp;
1967        parms.port_no = port_no;
1968        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
1969
1970        vport = new_vport(&parms);
1971        err = PTR_ERR(vport);
1972        if (IS_ERR(vport)) {
1973                if (err == -EAGAIN)
1974                        goto restart;
1975                goto exit_unlock_free;
1976        }
1977
1978        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1979                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1980        BUG_ON(err < 0);
1981        ovs_unlock();
1982
1983        ovs_notify(&dp_vport_genl_family, reply, info);
1984        return 0;
1985
1986exit_unlock_free:
1987        ovs_unlock();
1988        kfree_skb(reply);
1989        return err;
1990}
1991
1992static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1993{
1994        struct nlattr **a = info->attrs;
1995        struct sk_buff *reply;
1996        struct vport *vport;
1997        int err;
1998
1999        reply = ovs_vport_cmd_alloc_info();
2000        if (!reply)
2001                return -ENOMEM;
2002
2003        ovs_lock();
2004        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2005        err = PTR_ERR(vport);
2006        if (IS_ERR(vport))
2007                goto exit_unlock_free;
2008
2009        if (a[OVS_VPORT_ATTR_TYPE] &&
2010            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2011                err = -EINVAL;
2012                goto exit_unlock_free;
2013        }
2014
2015        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2016                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2017                if (err)
2018                        goto exit_unlock_free;
2019        }
2020
2021
2022        if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2023                struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2024
2025                err = ovs_vport_set_upcall_portids(vport, ids);
2026                if (err)
2027                        goto exit_unlock_free;
2028        }
2029
2030        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2031                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2032        BUG_ON(err < 0);
2033
2034        ovs_unlock();
2035        ovs_notify(&dp_vport_genl_family, reply, info);
2036        return 0;
2037
2038exit_unlock_free:
2039        ovs_unlock();
2040        kfree_skb(reply);
2041        return err;
2042}
2043
2044static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2045{
2046        struct nlattr **a = info->attrs;
2047        struct sk_buff *reply;
2048        struct vport *vport;
2049        int err;
2050
2051        reply = ovs_vport_cmd_alloc_info();
2052        if (!reply)
2053                return -ENOMEM;
2054
2055        ovs_lock();
2056        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2057        err = PTR_ERR(vport);
2058        if (IS_ERR(vport))
2059                goto exit_unlock_free;
2060
2061        if (vport->port_no == OVSP_LOCAL) {
2062                err = -EINVAL;
2063                goto exit_unlock_free;
2064        }
2065
2066        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2067                                      info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2068        BUG_ON(err < 0);
2069        ovs_dp_detach_port(vport);
2070        ovs_unlock();
2071
2072        ovs_notify(&dp_vport_genl_family, reply, info);
2073        return 0;
2074
2075exit_unlock_free:
2076        ovs_unlock();
2077        kfree_skb(reply);
2078        return err;
2079}
2080
2081static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2082{
2083        struct nlattr **a = info->attrs;
2084        struct ovs_header *ovs_header = info->userhdr;
2085        struct sk_buff *reply;
2086        struct vport *vport;
2087        int err;
2088
2089        reply = ovs_vport_cmd_alloc_info();
2090        if (!reply)
2091                return -ENOMEM;
2092
2093        rcu_read_lock();
2094        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2095        err = PTR_ERR(vport);
2096        if (IS_ERR(vport))
2097                goto exit_unlock_free;
2098        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2099                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2100        BUG_ON(err < 0);
2101        rcu_read_unlock();
2102
2103        return genlmsg_reply(reply, info);
2104
2105exit_unlock_free:
2106        rcu_read_unlock();
2107        kfree_skb(reply);
2108        return err;
2109}
2110
2111static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2112{
2113        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2114        struct datapath *dp;
2115        int bucket = cb->args[0], skip = cb->args[1];
2116        int i, j = 0;
2117
2118        rcu_read_lock();
2119        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2120        if (!dp) {
2121                rcu_read_unlock();
2122                return -ENODEV;
2123        }
2124        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2125                struct vport *vport;
2126
2127                j = 0;
2128                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2129                        if (j >= skip &&
2130                            ovs_vport_cmd_fill_info(vport, skb,
2131                                                    NETLINK_CB(cb->skb).portid,
2132                                                    cb->nlh->nlmsg_seq,
2133                                                    NLM_F_MULTI,
2134                                                    OVS_VPORT_CMD_NEW) < 0)
2135                                goto out;
2136
2137                        j++;
2138                }
2139                skip = 0;
2140        }
2141out:
2142        rcu_read_unlock();
2143
2144        cb->args[0] = i;
2145        cb->args[1] = j;
2146
2147        return skb->len;
2148}
2149
2150static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2151        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2152        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2153        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2154        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2155        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2156        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2157};
2158
2159static const struct genl_ops dp_vport_genl_ops[] = {
2160        { .cmd = OVS_VPORT_CMD_NEW,
2161          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2162          .policy = vport_policy,
2163          .doit = ovs_vport_cmd_new
2164        },
2165        { .cmd = OVS_VPORT_CMD_DEL,
2166          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2167          .policy = vport_policy,
2168          .doit = ovs_vport_cmd_del
2169        },
2170        { .cmd = OVS_VPORT_CMD_GET,
2171          .flags = 0,               /* OK for unprivileged users. */
2172          .policy = vport_policy,
2173          .doit = ovs_vport_cmd_get,
2174          .dumpit = ovs_vport_cmd_dump
2175        },
2176        { .cmd = OVS_VPORT_CMD_SET,
2177          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2178          .policy = vport_policy,
2179          .doit = ovs_vport_cmd_set,
2180        },
2181};
2182
2183struct genl_family dp_vport_genl_family = {
2184        .id = GENL_ID_GENERATE,
2185        .hdrsize = sizeof(struct ovs_header),
2186        .name = OVS_VPORT_FAMILY,
2187        .version = OVS_VPORT_VERSION,
2188        .maxattr = OVS_VPORT_ATTR_MAX,
2189        .netnsok = true,
2190        .parallel_ops = true,
2191        .ops = dp_vport_genl_ops,
2192        .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2193        .mcgrps = &ovs_dp_vport_multicast_group,
2194        .n_mcgrps = 1,
2195};
2196
2197static struct genl_family * const dp_genl_families[] = {
2198        &dp_datapath_genl_family,
2199        &dp_vport_genl_family,
2200        &dp_flow_genl_family,
2201        &dp_packet_genl_family,
2202};
2203
2204static void dp_unregister_genl(int n_families)
2205{
2206        int i;
2207
2208        for (i = 0; i < n_families; i++)
2209                genl_unregister_family(dp_genl_families[i]);
2210}
2211
2212static int dp_register_genl(void)
2213{
2214        int err;
2215        int i;
2216
2217        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2218
2219                err = genl_register_family(dp_genl_families[i]);
2220                if (err)
2221                        goto error;
2222        }
2223
2224        return 0;
2225
2226error:
2227        dp_unregister_genl(i);
2228        return err;
2229}
2230
2231static int __net_init ovs_init_net(struct net *net)
2232{
2233        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2234
2235        INIT_LIST_HEAD(&ovs_net->dps);
2236        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2237        ovs_ct_init(net);
2238        return 0;
2239}
2240
2241static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2242                                            struct list_head *head)
2243{
2244        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2245        struct datapath *dp;
2246
2247        list_for_each_entry(dp, &ovs_net->dps, list_node) {
2248                int i;
2249
2250                for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2251                        struct vport *vport;
2252
2253                        hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2254                                if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2255                                        continue;
2256
2257                                if (dev_net(vport->dev) == dnet)
2258                                        list_add(&vport->detach_list, head);
2259                        }
2260                }
2261        }
2262}
2263
2264static void __net_exit ovs_exit_net(struct net *dnet)
2265{
2266        struct datapath *dp, *dp_next;
2267        struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2268        struct vport *vport, *vport_next;
2269        struct net *net;
2270        LIST_HEAD(head);
2271
2272        ovs_ct_exit(dnet);
2273        ovs_lock();
2274        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2275                __dp_destroy(dp);
2276
2277        rtnl_lock();
2278        for_each_net(net)
2279                list_vports_from_net(net, dnet, &head);
2280        rtnl_unlock();
2281
2282        /* Detach all vports from given namespace. */
2283        list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2284                list_del(&vport->detach_list);
2285                ovs_dp_detach_port(vport);
2286        }
2287
2288        ovs_unlock();
2289
2290        cancel_work_sync(&ovs_net->dp_notify_work);
2291}
2292
2293static struct pernet_operations ovs_net_ops = {
2294        .init = ovs_init_net,
2295        .exit = ovs_exit_net,
2296        .id   = &ovs_net_id,
2297        .size = sizeof(struct ovs_net),
2298};
2299
2300static int __init dp_init(void)
2301{
2302        int err;
2303
2304        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2305
2306        pr_info("Open vSwitch switching datapath\n");
2307
2308        err = action_fifos_init();
2309        if (err)
2310                goto error;
2311
2312        err = ovs_internal_dev_rtnl_link_register();
2313        if (err)
2314                goto error_action_fifos_exit;
2315
2316        err = ovs_flow_init();
2317        if (err)
2318                goto error_unreg_rtnl_link;
2319
2320        err = ovs_vport_init();
2321        if (err)
2322                goto error_flow_exit;
2323
2324        err = register_pernet_device(&ovs_net_ops);
2325        if (err)
2326                goto error_vport_exit;
2327
2328        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2329        if (err)
2330                goto error_netns_exit;
2331
2332        err = ovs_netdev_init();
2333        if (err)
2334                goto error_unreg_notifier;
2335
2336        err = dp_register_genl();
2337        if (err < 0)
2338                goto error_unreg_netdev;
2339
2340        return 0;
2341
2342error_unreg_netdev:
2343        ovs_netdev_exit();
2344error_unreg_notifier:
2345        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2346error_netns_exit:
2347        unregister_pernet_device(&ovs_net_ops);
2348error_vport_exit:
2349        ovs_vport_exit();
2350error_flow_exit:
2351        ovs_flow_exit();
2352error_unreg_rtnl_link:
2353        ovs_internal_dev_rtnl_link_unregister();
2354error_action_fifos_exit:
2355        action_fifos_exit();
2356error:
2357        return err;
2358}
2359
2360static void dp_cleanup(void)
2361{
2362        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2363        ovs_netdev_exit();
2364        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2365        unregister_pernet_device(&ovs_net_ops);
2366        rcu_barrier();
2367        ovs_vport_exit();
2368        ovs_flow_exit();
2369        ovs_internal_dev_rtnl_link_unregister();
2370        action_fifos_exit();
2371}
2372
2373module_init(dp_init);
2374module_exit(dp_cleanup);
2375
2376MODULE_DESCRIPTION("Open vSwitch switching datapath");
2377MODULE_LICENSE("GPL");
2378