linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2014 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/openvswitch.h>
  48#include <linux/rculist.h>
  49#include <linux/dmi.h>
  50#include <linux/genetlink.h>
  51#include <net/genetlink.h>
  52#include <net/genetlink.h>
  53#include <net/net_namespace.h>
  54#include <net/netns/generic.h>
  55
  56#include "datapath.h"
  57#include "flow.h"
  58#include "flow_table.h"
  59#include "flow_netlink.h"
  60#include "vport-internal_dev.h"
  61#include "vport-netdev.h"
  62
  63int ovs_net_id __read_mostly;
  64
  65static struct genl_family dp_packet_genl_family;
  66static struct genl_family dp_flow_genl_family;
  67static struct genl_family dp_datapath_genl_family;
  68
  69static struct genl_multicast_group ovs_dp_flow_multicast_group = {
  70        .name = OVS_FLOW_MCGROUP
  71};
  72
  73static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  74        .name = OVS_DATAPATH_MCGROUP
  75};
  76
  77struct genl_multicast_group ovs_dp_vport_multicast_group = {
  78        .name = OVS_VPORT_MCGROUP
  79};
  80
  81/* Check if need to build a reply message.
  82 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  83static bool ovs_must_notify(struct genl_info *info,
  84                            const struct genl_multicast_group *grp)
  85{
  86        return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  87                netlink_has_listeners(genl_info_net(info)->genl_sock, 0);
  88}
  89
  90static void ovs_notify(struct genl_family *family,
  91                       struct sk_buff *skb, struct genl_info *info)
  92{
  93        genl_notify(family, skb, genl_info_net(info), info->snd_portid,
  94                    0, info->nlhdr, GFP_KERNEL);
  95}
  96
  97/**
  98 * DOC: Locking:
  99 *
 100 * All writes e.g. Writes to device state (add/remove datapath, port, set
 101 * operations on vports, etc.), Writes to other state (flow table
 102 * modifications, set miscellaneous datapath parameters, etc.) are protected
 103 * by ovs_lock.
 104 *
 105 * Reads are protected by RCU.
 106 *
 107 * There are a few special cases (mostly stats) that have their own
 108 * synchronization but they nest under all of above and don't interact with
 109 * each other.
 110 *
 111 * The RTNL lock nests inside ovs_mutex.
 112 */
 113
 114static DEFINE_MUTEX(ovs_mutex);
 115
 116void ovs_lock(void)
 117{
 118        mutex_lock(&ovs_mutex);
 119}
 120
 121void ovs_unlock(void)
 122{
 123        mutex_unlock(&ovs_mutex);
 124}
 125
 126#ifdef CONFIG_LOCKDEP
 127int lockdep_ovsl_is_held(void)
 128{
 129        if (debug_locks)
 130                return lockdep_is_held(&ovs_mutex);
 131        else
 132                return 1;
 133}
 134#endif
 135
 136static struct vport *new_vport(const struct vport_parms *);
 137static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 138                             const struct dp_upcall_info *);
 139static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 140                                  const struct dp_upcall_info *);
 141
 142/* Must be called with rcu_read_lock or ovs_mutex. */
 143static struct datapath *get_dp(struct net *net, int dp_ifindex)
 144{
 145        struct datapath *dp = NULL;
 146        struct net_device *dev;
 147
 148        rcu_read_lock();
 149        dev = dev_get_by_index_rcu(net, dp_ifindex);
 150        if (dev) {
 151                struct vport *vport = ovs_internal_dev_get_vport(dev);
 152                if (vport)
 153                        dp = vport->dp;
 154        }
 155        rcu_read_unlock();
 156
 157        return dp;
 158}
 159
 160/* Must be called with rcu_read_lock or ovs_mutex. */
 161static const char *ovs_dp_name(const struct datapath *dp)
 162{
 163        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 164        return vport->ops->get_name(vport);
 165}
 166
 167static int get_dpifindex(struct datapath *dp)
 168{
 169        struct vport *local;
 170        int ifindex;
 171
 172        rcu_read_lock();
 173
 174        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 175        if (local)
 176                ifindex = netdev_vport_priv(local)->dev->ifindex;
 177        else
 178                ifindex = 0;
 179
 180        rcu_read_unlock();
 181
 182        return ifindex;
 183}
 184
 185static void destroy_dp_rcu(struct rcu_head *rcu)
 186{
 187        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 188
 189        free_percpu(dp->stats_percpu);
 190        release_net(ovs_dp_get_net(dp));
 191        kfree(dp->ports);
 192        kfree(dp);
 193}
 194
 195static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 196                                            u16 port_no)
 197{
 198        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 199}
 200
 201/* Called with ovs_mutex or RCU read lock. */
 202struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 203{
 204        struct vport *vport;
 205        struct hlist_head *head;
 206
 207        head = vport_hash_bucket(dp, port_no);
 208        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 209                if (vport->port_no == port_no)
 210                        return vport;
 211        }
 212        return NULL;
 213}
 214
 215/* Called with ovs_mutex. */
 216static struct vport *new_vport(const struct vport_parms *parms)
 217{
 218        struct vport *vport;
 219
 220        vport = ovs_vport_add(parms);
 221        if (!IS_ERR(vport)) {
 222                struct datapath *dp = parms->dp;
 223                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 224
 225                hlist_add_head_rcu(&vport->dp_hash_node, head);
 226        }
 227        return vport;
 228}
 229
 230void ovs_dp_detach_port(struct vport *p)
 231{
 232        ASSERT_OVSL();
 233
 234        /* First drop references to device. */
 235        hlist_del_rcu(&p->dp_hash_node);
 236
 237        /* Then destroy it. */
 238        ovs_vport_del(p);
 239}
 240
 241/* Must be called with rcu_read_lock. */
 242void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 243{
 244        struct datapath *dp = p->dp;
 245        struct sw_flow *flow;
 246        struct dp_stats_percpu *stats;
 247        struct sw_flow_key key;
 248        u64 *stats_counter;
 249        u32 n_mask_hit;
 250        int error;
 251
 252        stats = this_cpu_ptr(dp->stats_percpu);
 253
 254        /* Extract flow from 'skb' into 'key'. */
 255        error = ovs_flow_extract(skb, p->port_no, &key);
 256        if (unlikely(error)) {
 257                kfree_skb(skb);
 258                return;
 259        }
 260
 261        /* Look up flow. */
 262        flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
 263        if (unlikely(!flow)) {
 264                struct dp_upcall_info upcall;
 265
 266                upcall.cmd = OVS_PACKET_CMD_MISS;
 267                upcall.key = &key;
 268                upcall.userdata = NULL;
 269                upcall.portid = p->upcall_portid;
 270                ovs_dp_upcall(dp, skb, &upcall);
 271                consume_skb(skb);
 272                stats_counter = &stats->n_missed;
 273                goto out;
 274        }
 275
 276        OVS_CB(skb)->flow = flow;
 277        OVS_CB(skb)->pkt_key = &key;
 278
 279        ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);
 280        ovs_execute_actions(dp, skb);
 281        stats_counter = &stats->n_hit;
 282
 283out:
 284        /* Update datapath statistics. */
 285        u64_stats_update_begin(&stats->syncp);
 286        (*stats_counter)++;
 287        stats->n_mask_hit += n_mask_hit;
 288        u64_stats_update_end(&stats->syncp);
 289}
 290
 291int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 292                  const struct dp_upcall_info *upcall_info)
 293{
 294        struct dp_stats_percpu *stats;
 295        int err;
 296
 297        if (upcall_info->portid == 0) {
 298                err = -ENOTCONN;
 299                goto err;
 300        }
 301
 302        if (!skb_is_gso(skb))
 303                err = queue_userspace_packet(dp, skb, upcall_info);
 304        else
 305                err = queue_gso_packets(dp, skb, upcall_info);
 306        if (err)
 307                goto err;
 308
 309        return 0;
 310
 311err:
 312        stats = this_cpu_ptr(dp->stats_percpu);
 313
 314        u64_stats_update_begin(&stats->syncp);
 315        stats->n_lost++;
 316        u64_stats_update_end(&stats->syncp);
 317
 318        return err;
 319}
 320
 321static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 322                             const struct dp_upcall_info *upcall_info)
 323{
 324        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 325        struct dp_upcall_info later_info;
 326        struct sw_flow_key later_key;
 327        struct sk_buff *segs, *nskb;
 328        int err;
 329
 330        segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 331        if (IS_ERR(segs))
 332                return PTR_ERR(segs);
 333
 334        /* Queue all of the segments. */
 335        skb = segs;
 336        do {
 337                err = queue_userspace_packet(dp, skb, upcall_info);
 338                if (err)
 339                        break;
 340
 341                if (skb == segs && gso_type & SKB_GSO_UDP) {
 342                        /* The initial flow key extracted by ovs_flow_extract()
 343                         * in this case is for a first fragment, so we need to
 344                         * properly mark later fragments.
 345                         */
 346                        later_key = *upcall_info->key;
 347                        later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 348
 349                        later_info = *upcall_info;
 350                        later_info.key = &later_key;
 351                        upcall_info = &later_info;
 352                }
 353        } while ((skb = skb->next));
 354
 355        /* Free all of the segments. */
 356        skb = segs;
 357        do {
 358                nskb = skb->next;
 359                if (err)
 360                        kfree_skb(skb);
 361                else
 362                        consume_skb(skb);
 363        } while ((skb = nskb));
 364        return err;
 365}
 366
 367static size_t key_attr_size(void)
 368{
 369        return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 370                + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
 371                  + nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
 372                  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
 373                  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
 374                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
 375                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
 376                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 377                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
 378                + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 379                + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 380                + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 381                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 382                + nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */
 383                + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
 384                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 385                + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
 386                + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
 387                + nla_total_size(28); /* OVS_KEY_ATTR_ND */
 388}
 389
 390static size_t upcall_msg_size(const struct nlattr *userdata,
 391                              unsigned int hdrlen)
 392{
 393        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 394                + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
 395                + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
 396
 397        /* OVS_PACKET_ATTR_USERDATA */
 398        if (userdata)
 399                size += NLA_ALIGN(userdata->nla_len);
 400
 401        return size;
 402}
 403
 404static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 405                                  const struct dp_upcall_info *upcall_info)
 406{
 407        struct ovs_header *upcall;
 408        struct sk_buff *nskb = NULL;
 409        struct sk_buff *user_skb; /* to be queued to userspace */
 410        struct nlattr *nla;
 411        struct genl_info info = {
 412                .dst_sk = ovs_dp_get_net(dp)->genl_sock,
 413                .snd_portid = upcall_info->portid,
 414        };
 415        size_t len;
 416        unsigned int hlen;
 417        int err, dp_ifindex;
 418
 419        dp_ifindex = get_dpifindex(dp);
 420        if (!dp_ifindex)
 421                return -ENODEV;
 422
 423        if (vlan_tx_tag_present(skb)) {
 424                nskb = skb_clone(skb, GFP_ATOMIC);
 425                if (!nskb)
 426                        return -ENOMEM;
 427
 428                nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
 429                if (!nskb)
 430                        return -ENOMEM;
 431
 432                nskb->vlan_tci = 0;
 433                skb = nskb;
 434        }
 435
 436        if (nla_attr_size(skb->len) > USHRT_MAX) {
 437                err = -EFBIG;
 438                goto out;
 439        }
 440
 441        /* Complete checksum if needed */
 442        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 443            (err = skb_checksum_help(skb)))
 444                goto out;
 445
 446        /* Older versions of OVS user space enforce alignment of the last
 447         * Netlink attribute to NLA_ALIGNTO which would require extensive
 448         * padding logic. Only perform zerocopy if padding is not required.
 449         */
 450        if (dp->user_features & OVS_DP_F_UNALIGNED)
 451                hlen = skb_zerocopy_headlen(skb);
 452        else
 453                hlen = skb->len;
 454
 455        len = upcall_msg_size(upcall_info->userdata, hlen);
 456        user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
 457        if (!user_skb) {
 458                err = -ENOMEM;
 459                goto out;
 460        }
 461
 462        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 463                             0, upcall_info->cmd);
 464        upcall->dp_ifindex = dp_ifindex;
 465
 466        nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
 467        ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
 468        nla_nest_end(user_skb, nla);
 469
 470        if (upcall_info->userdata)
 471                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 472                          nla_len(upcall_info->userdata),
 473                          nla_data(upcall_info->userdata));
 474
 475        /* Only reserve room for attribute header, packet data is added
 476         * in skb_zerocopy() */
 477        if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 478                err = -ENOBUFS;
 479                goto out;
 480        }
 481        nla->nla_len = nla_attr_size(skb->len);
 482
 483        err = skb_zerocopy(user_skb, skb, skb->len, hlen);
 484        if (err)
 485                goto out;
 486
 487        /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
 488        if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
 489                size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
 490
 491                if (plen > 0)
 492                        memset(skb_put(user_skb, plen), 0, plen);
 493        }
 494
 495        ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 496
 497        err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
 498out:
 499        if (err)
 500                skb_tx_error(skb);
 501        kfree_skb(nskb);
 502        return err;
 503}
 504
 505static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 506{
 507        struct ovs_header *ovs_header = info->userhdr;
 508        struct nlattr **a = info->attrs;
 509        struct sw_flow_actions *acts;
 510        struct sk_buff *packet;
 511        struct sw_flow *flow;
 512        struct datapath *dp;
 513        struct ethhdr *eth;
 514        int len;
 515        int err;
 516
 517        err = -EINVAL;
 518        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 519            !a[OVS_PACKET_ATTR_ACTIONS])
 520                goto err;
 521
 522        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 523        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 524        err = -ENOMEM;
 525        if (!packet)
 526                goto err;
 527        skb_reserve(packet, NET_IP_ALIGN);
 528
 529        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 530
 531        skb_reset_mac_header(packet);
 532        eth = eth_hdr(packet);
 533
 534        /* Normally, setting the skb 'protocol' field would be handled by a
 535         * call to eth_type_trans(), but it assumes there's a sending
 536         * device, which we may not have. */
 537        if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 538                packet->protocol = eth->h_proto;
 539        else
 540                packet->protocol = htons(ETH_P_802_2);
 541
 542        /* Build an sw_flow for sending this packet. */
 543        flow = ovs_flow_alloc();
 544        err = PTR_ERR(flow);
 545        if (IS_ERR(flow))
 546                goto err_kfree_skb;
 547
 548        err = ovs_flow_extract(packet, -1, &flow->key);
 549        if (err)
 550                goto err_flow_free;
 551
 552        err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
 553        if (err)
 554                goto err_flow_free;
 555        acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
 556        err = PTR_ERR(acts);
 557        if (IS_ERR(acts))
 558                goto err_flow_free;
 559
 560        err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
 561                                   &flow->key, 0, &acts);
 562        rcu_assign_pointer(flow->sf_acts, acts);
 563        if (err)
 564                goto err_flow_free;
 565
 566        OVS_CB(packet)->flow = flow;
 567        OVS_CB(packet)->pkt_key = &flow->key;
 568        packet->priority = flow->key.phy.priority;
 569        packet->mark = flow->key.phy.skb_mark;
 570
 571        rcu_read_lock();
 572        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 573        err = -ENODEV;
 574        if (!dp)
 575                goto err_unlock;
 576
 577        local_bh_disable();
 578        err = ovs_execute_actions(dp, packet);
 579        local_bh_enable();
 580        rcu_read_unlock();
 581
 582        ovs_flow_free(flow, false);
 583        return err;
 584
 585err_unlock:
 586        rcu_read_unlock();
 587err_flow_free:
 588        ovs_flow_free(flow, false);
 589err_kfree_skb:
 590        kfree_skb(packet);
 591err:
 592        return err;
 593}
 594
 595static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 596        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 597        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 598        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 599};
 600
 601static const struct genl_ops dp_packet_genl_ops[] = {
 602        { .cmd = OVS_PACKET_CMD_EXECUTE,
 603          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 604          .policy = packet_policy,
 605          .doit = ovs_packet_cmd_execute
 606        }
 607};
 608
 609static struct genl_family dp_packet_genl_family = {
 610        .id = GENL_ID_GENERATE,
 611        .hdrsize = sizeof(struct ovs_header),
 612        .name = OVS_PACKET_FAMILY,
 613        .version = OVS_PACKET_VERSION,
 614        .maxattr = OVS_PACKET_ATTR_MAX,
 615        .netnsok = true,
 616        .parallel_ops = true,
 617        .ops = dp_packet_genl_ops,
 618        .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
 619};
 620
 621static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
 622                         struct ovs_dp_megaflow_stats *mega_stats)
 623{
 624        int i;
 625
 626        memset(mega_stats, 0, sizeof(*mega_stats));
 627
 628        stats->n_flows = ovs_flow_tbl_count(&dp->table);
 629        mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 630
 631        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 632
 633        for_each_possible_cpu(i) {
 634                const struct dp_stats_percpu *percpu_stats;
 635                struct dp_stats_percpu local_stats;
 636                unsigned int start;
 637
 638                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 639
 640                do {
 641                        start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
 642                        local_stats = *percpu_stats;
 643                } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
 644
 645                stats->n_hit += local_stats.n_hit;
 646                stats->n_missed += local_stats.n_missed;
 647                stats->n_lost += local_stats.n_lost;
 648                mega_stats->n_mask_hit += local_stats.n_mask_hit;
 649        }
 650}
 651
 652static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 653{
 654        return NLMSG_ALIGN(sizeof(struct ovs_header))
 655                + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
 656                + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
 657                + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 658                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 659                + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
 660                + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
 661}
 662
 663/* Called with ovs_mutex or RCU read lock. */
 664static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 665                                  struct sk_buff *skb, u32 portid,
 666                                  u32 seq, u32 flags, u8 cmd)
 667{
 668        const int skb_orig_len = skb->len;
 669        struct nlattr *start;
 670        struct ovs_flow_stats stats;
 671        __be16 tcp_flags;
 672        unsigned long used;
 673        struct ovs_header *ovs_header;
 674        struct nlattr *nla;
 675        int err;
 676
 677        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
 678        if (!ovs_header)
 679                return -EMSGSIZE;
 680
 681        ovs_header->dp_ifindex = dp_ifindex;
 682
 683        /* Fill flow key. */
 684        nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
 685        if (!nla)
 686                goto nla_put_failure;
 687
 688        err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
 689        if (err)
 690                goto error;
 691        nla_nest_end(skb, nla);
 692
 693        nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
 694        if (!nla)
 695                goto nla_put_failure;
 696
 697        err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
 698        if (err)
 699                goto error;
 700
 701        nla_nest_end(skb, nla);
 702
 703        ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 704
 705        if (used &&
 706            nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
 707                goto nla_put_failure;
 708
 709        if (stats.n_packets &&
 710            nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
 711                goto nla_put_failure;
 712
 713        if ((u8)ntohs(tcp_flags) &&
 714             nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
 715                goto nla_put_failure;
 716
 717        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 718         * this is the first flow to be dumped into 'skb'.  This is unusual for
 719         * Netlink but individual action lists can be longer than
 720         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 721         * The userspace caller can always fetch the actions separately if it
 722         * really wants them.  (Most userspace callers in fact don't care.)
 723         *
 724         * This can only fail for dump operations because the skb is always
 725         * properly sized for single flows.
 726         */
 727        start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
 728        if (start) {
 729                const struct sw_flow_actions *sf_acts;
 730
 731                sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 732                err = ovs_nla_put_actions(sf_acts->actions,
 733                                          sf_acts->actions_len, skb);
 734
 735                if (!err)
 736                        nla_nest_end(skb, start);
 737                else {
 738                        if (skb_orig_len)
 739                                goto error;
 740
 741                        nla_nest_cancel(skb, start);
 742                }
 743        } else if (skb_orig_len)
 744                goto nla_put_failure;
 745
 746        return genlmsg_end(skb, ovs_header);
 747
 748nla_put_failure:
 749        err = -EMSGSIZE;
 750error:
 751        genlmsg_cancel(skb, ovs_header);
 752        return err;
 753}
 754
 755/* May not be called with RCU read lock. */
 756static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
 757                                               struct genl_info *info,
 758                                               bool always)
 759{
 760        struct sk_buff *skb;
 761
 762        if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
 763                return NULL;
 764
 765        skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
 766        if (!skb)
 767                return ERR_PTR(-ENOMEM);
 768
 769        return skb;
 770}
 771
 772/* Called with ovs_mutex. */
 773static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 774                                               int dp_ifindex,
 775                                               struct genl_info *info, u8 cmd,
 776                                               bool always)
 777{
 778        struct sk_buff *skb;
 779        int retval;
 780
 781        skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
 782                                      always);
 783        if (!skb || IS_ERR(skb))
 784                return skb;
 785
 786        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 787                                        info->snd_portid, info->snd_seq, 0,
 788                                        cmd);
 789        BUG_ON(retval < 0);
 790        return skb;
 791}
 792
 793static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 794{
 795        struct nlattr **a = info->attrs;
 796        struct ovs_header *ovs_header = info->userhdr;
 797        struct sw_flow *flow, *new_flow;
 798        struct sw_flow_mask mask;
 799        struct sk_buff *reply;
 800        struct datapath *dp;
 801        struct sw_flow_actions *acts;
 802        struct sw_flow_match match;
 803        int error;
 804
 805        /* Must have key and actions. */
 806        error = -EINVAL;
 807        if (!a[OVS_FLOW_ATTR_KEY])
 808                goto error;
 809        if (!a[OVS_FLOW_ATTR_ACTIONS])
 810                goto error;
 811
 812        /* Most of the time we need to allocate a new flow, do it before
 813         * locking.
 814         */
 815        new_flow = ovs_flow_alloc();
 816        if (IS_ERR(new_flow)) {
 817                error = PTR_ERR(new_flow);
 818                goto error;
 819        }
 820
 821        /* Extract key. */
 822        ovs_match_init(&match, &new_flow->unmasked_key, &mask);
 823        error = ovs_nla_get_match(&match,
 824                                  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
 825        if (error)
 826                goto err_kfree_flow;
 827
 828        ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
 829
 830        /* Validate actions. */
 831        acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
 832        error = PTR_ERR(acts);
 833        if (IS_ERR(acts))
 834                goto err_kfree_flow;
 835
 836        error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
 837                                     0, &acts);
 838        if (error) {
 839                OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
 840                goto err_kfree_acts;
 841        }
 842
 843        reply = ovs_flow_cmd_alloc_info(acts, info, false);
 844        if (IS_ERR(reply)) {
 845                error = PTR_ERR(reply);
 846                goto err_kfree_acts;
 847        }
 848
 849        ovs_lock();
 850        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 851        if (unlikely(!dp)) {
 852                error = -ENODEV;
 853                goto err_unlock_ovs;
 854        }
 855        /* Check if this is a duplicate flow */
 856        flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
 857        if (likely(!flow)) {
 858                rcu_assign_pointer(new_flow->sf_acts, acts);
 859
 860                /* Put flow in bucket. */
 861                error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
 862                if (unlikely(error)) {
 863                        acts = NULL;
 864                        goto err_unlock_ovs;
 865                }
 866
 867                if (unlikely(reply)) {
 868                        error = ovs_flow_cmd_fill_info(new_flow,
 869                                                       ovs_header->dp_ifindex,
 870                                                       reply, info->snd_portid,
 871                                                       info->snd_seq, 0,
 872                                                       OVS_FLOW_CMD_NEW);
 873                        BUG_ON(error < 0);
 874                }
 875                ovs_unlock();
 876        } else {
 877                struct sw_flow_actions *old_acts;
 878
 879                /* Bail out if we're not allowed to modify an existing flow.
 880                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
 881                 * because Generic Netlink treats the latter as a dump
 882                 * request.  We also accept NLM_F_EXCL in case that bug ever
 883                 * gets fixed.
 884                 */
 885                if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
 886                                                         | NLM_F_EXCL))) {
 887                        error = -EEXIST;
 888                        goto err_unlock_ovs;
 889                }
 890                /* The unmasked key has to be the same for flow updates. */
 891                if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
 892                        flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 893                        if (!flow) {
 894                                error = -ENOENT;
 895                                goto err_unlock_ovs;
 896                        }
 897                }
 898                /* Update actions. */
 899                old_acts = ovsl_dereference(flow->sf_acts);
 900                rcu_assign_pointer(flow->sf_acts, acts);
 901
 902                if (unlikely(reply)) {
 903                        error = ovs_flow_cmd_fill_info(flow,
 904                                                       ovs_header->dp_ifindex,
 905                                                       reply, info->snd_portid,
 906                                                       info->snd_seq, 0,
 907                                                       OVS_FLOW_CMD_NEW);
 908                        BUG_ON(error < 0);
 909                }
 910                ovs_unlock();
 911
 912                ovs_nla_free_flow_actions(old_acts);
 913                ovs_flow_free(new_flow, false);
 914        }
 915
 916        if (reply)
 917                ovs_notify(&dp_flow_genl_family, reply, info);
 918        return 0;
 919
 920err_unlock_ovs:
 921        ovs_unlock();
 922        kfree_skb(reply);
 923err_kfree_acts:
 924        kfree(acts);
 925err_kfree_flow:
 926        ovs_flow_free(new_flow, false);
 927error:
 928        return error;
 929}
 930
 931static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 932{
 933        struct nlattr **a = info->attrs;
 934        struct ovs_header *ovs_header = info->userhdr;
 935        struct sw_flow_key key, masked_key;
 936        struct sw_flow *flow;
 937        struct sw_flow_mask mask;
 938        struct sk_buff *reply = NULL;
 939        struct datapath *dp;
 940        struct sw_flow_actions *old_acts = NULL, *acts = NULL;
 941        struct sw_flow_match match;
 942        int error;
 943
 944        /* Extract key. */
 945        error = -EINVAL;
 946        if (!a[OVS_FLOW_ATTR_KEY])
 947                goto error;
 948
 949        ovs_match_init(&match, &key, &mask);
 950        error = ovs_nla_get_match(&match,
 951                                  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
 952        if (error)
 953                goto error;
 954
 955        /* Validate actions. */
 956        if (a[OVS_FLOW_ATTR_ACTIONS]) {
 957                acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
 958                error = PTR_ERR(acts);
 959                if (IS_ERR(acts))
 960                        goto error;
 961
 962                ovs_flow_mask_key(&masked_key, &key, &mask);
 963                error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
 964                                             &masked_key, 0, &acts);
 965                if (error) {
 966                        OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
 967                        goto err_kfree_acts;
 968                }
 969        }
 970
 971        /* Can allocate before locking if have acts. */
 972        if (acts) {
 973                reply = ovs_flow_cmd_alloc_info(acts, info, false);
 974                if (IS_ERR(reply)) {
 975                        error = PTR_ERR(reply);
 976                        goto err_kfree_acts;
 977                }
 978        }
 979
 980        ovs_lock();
 981        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 982        if (unlikely(!dp)) {
 983                error = -ENODEV;
 984                goto err_unlock_ovs;
 985        }
 986        /* Check that the flow exists. */
 987        flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 988        if (unlikely(!flow)) {
 989                error = -ENOENT;
 990                goto err_unlock_ovs;
 991        }
 992
 993        /* Update actions, if present. */
 994        if (likely(acts)) {
 995                old_acts = ovsl_dereference(flow->sf_acts);
 996                rcu_assign_pointer(flow->sf_acts, acts);
 997
 998                if (unlikely(reply)) {
 999                        error = ovs_flow_cmd_fill_info(flow,
1000                                                       ovs_header->dp_ifindex,
1001                                                       reply, info->snd_portid,
1002                                                       info->snd_seq, 0,
1003                                                       OVS_FLOW_CMD_NEW);
1004                        BUG_ON(error < 0);
1005                }
1006        } else {
1007                /* Could not alloc without acts before locking. */
1008                reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1009                                                info, OVS_FLOW_CMD_NEW, false);
1010                if (unlikely(IS_ERR(reply))) {
1011                        error = PTR_ERR(reply);
1012                        goto err_unlock_ovs;
1013                }
1014        }
1015
1016        /* Clear stats. */
1017        if (a[OVS_FLOW_ATTR_CLEAR])
1018                ovs_flow_stats_clear(flow);
1019        ovs_unlock();
1020
1021        if (reply)
1022                ovs_notify(&dp_flow_genl_family, reply, info);
1023        if (old_acts)
1024                ovs_nla_free_flow_actions(old_acts);
1025
1026        return 0;
1027
1028err_unlock_ovs:
1029        ovs_unlock();
1030        kfree_skb(reply);
1031err_kfree_acts:
1032        kfree(acts);
1033error:
1034        return error;
1035}
1036
1037static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1038{
1039        struct nlattr **a = info->attrs;
1040        struct ovs_header *ovs_header = info->userhdr;
1041        struct sw_flow_key key;
1042        struct sk_buff *reply;
1043        struct sw_flow *flow;
1044        struct datapath *dp;
1045        struct sw_flow_match match;
1046        int err;
1047
1048        if (!a[OVS_FLOW_ATTR_KEY]) {
1049                OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
1050                return -EINVAL;
1051        }
1052
1053        ovs_match_init(&match, &key, NULL);
1054        err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1055        if (err)
1056                return err;
1057
1058        ovs_lock();
1059        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1060        if (!dp) {
1061                err = -ENODEV;
1062                goto unlock;
1063        }
1064
1065        flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1066        if (!flow) {
1067                err = -ENOENT;
1068                goto unlock;
1069        }
1070
1071        reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1072                                        OVS_FLOW_CMD_NEW, true);
1073        if (IS_ERR(reply)) {
1074                err = PTR_ERR(reply);
1075                goto unlock;
1076        }
1077
1078        ovs_unlock();
1079        return genlmsg_reply(reply, info);
1080unlock:
1081        ovs_unlock();
1082        return err;
1083}
1084
1085static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1086{
1087        struct nlattr **a = info->attrs;
1088        struct ovs_header *ovs_header = info->userhdr;
1089        struct sw_flow_key key;
1090        struct sk_buff *reply;
1091        struct sw_flow *flow;
1092        struct datapath *dp;
1093        struct sw_flow_match match;
1094        int err;
1095
1096        if (likely(a[OVS_FLOW_ATTR_KEY])) {
1097                ovs_match_init(&match, &key, NULL);
1098                err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1099                if (unlikely(err))
1100                        return err;
1101        }
1102
1103        ovs_lock();
1104        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1105        if (unlikely(!dp)) {
1106                err = -ENODEV;
1107                goto unlock;
1108        }
1109
1110        if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
1111                err = ovs_flow_tbl_flush(&dp->table);
1112                goto unlock;
1113        }
1114
1115        flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1116        if (unlikely(!flow)) {
1117                err = -ENOENT;
1118                goto unlock;
1119        }
1120
1121        ovs_flow_tbl_remove(&dp->table, flow);
1122        ovs_unlock();
1123
1124        reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1125                                        info, false);
1126        if (likely(reply)) {
1127                if (likely(!IS_ERR(reply))) {
1128                        rcu_read_lock();        /*To keep RCU checker happy. */
1129                        err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1130                                                     reply, info->snd_portid,
1131                                                     info->snd_seq, 0,
1132                                                     OVS_FLOW_CMD_DEL);
1133                        rcu_read_unlock();
1134                        BUG_ON(err < 0);
1135
1136                        ovs_notify(&dp_flow_genl_family, reply, info);
1137                } else {
1138                        netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1139                }
1140        }
1141
1142        ovs_flow_free(flow, true);
1143        return 0;
1144unlock:
1145        ovs_unlock();
1146        return err;
1147}
1148
1149static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1150{
1151        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1152        struct table_instance *ti;
1153        struct datapath *dp;
1154
1155        rcu_read_lock();
1156        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1157        if (!dp) {
1158                rcu_read_unlock();
1159                return -ENODEV;
1160        }
1161
1162        ti = rcu_dereference(dp->table.ti);
1163        for (;;) {
1164                struct sw_flow *flow;
1165                u32 bucket, obj;
1166
1167                bucket = cb->args[0];
1168                obj = cb->args[1];
1169                flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1170                if (!flow)
1171                        break;
1172
1173                if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1174                                           NETLINK_CB(cb->skb).portid,
1175                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1176                                           OVS_FLOW_CMD_NEW) < 0)
1177                        break;
1178
1179                cb->args[0] = bucket;
1180                cb->args[1] = obj;
1181        }
1182        rcu_read_unlock();
1183        return skb->len;
1184}
1185
1186static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1187        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1188        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1189        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1190};
1191
1192static struct genl_ops dp_flow_genl_ops[] = {
1193        { .cmd = OVS_FLOW_CMD_NEW,
1194          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1195          .policy = flow_policy,
1196          .doit = ovs_flow_cmd_new
1197        },
1198        { .cmd = OVS_FLOW_CMD_DEL,
1199          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1200          .policy = flow_policy,
1201          .doit = ovs_flow_cmd_del
1202        },
1203        { .cmd = OVS_FLOW_CMD_GET,
1204          .flags = 0,               /* OK for unprivileged users. */
1205          .policy = flow_policy,
1206          .doit = ovs_flow_cmd_get,
1207          .dumpit = ovs_flow_cmd_dump
1208        },
1209        { .cmd = OVS_FLOW_CMD_SET,
1210          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1211          .policy = flow_policy,
1212          .doit = ovs_flow_cmd_set,
1213        },
1214};
1215
1216static struct genl_family dp_flow_genl_family = {
1217        .id = GENL_ID_GENERATE,
1218        .hdrsize = sizeof(struct ovs_header),
1219        .name = OVS_FLOW_FAMILY,
1220        .version = OVS_FLOW_VERSION,
1221        .maxattr = OVS_FLOW_ATTR_MAX,
1222        .netnsok = true,
1223        .parallel_ops = true,
1224        .ops = dp_flow_genl_ops,
1225        .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1226        .mcgrps = &ovs_dp_flow_multicast_group,
1227        .n_mcgrps = 1,
1228};
1229
1230static size_t ovs_dp_cmd_msg_size(void)
1231{
1232        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1233
1234        msgsize += nla_total_size(IFNAMSIZ);
1235        msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1236        msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
1237        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1238
1239        return msgsize;
1240}
1241
1242/* Called with ovs_mutex or RCU read lock. */
1243static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1244                                u32 portid, u32 seq, u32 flags, u8 cmd)
1245{
1246        struct ovs_header *ovs_header;
1247        struct ovs_dp_stats dp_stats;
1248        struct ovs_dp_megaflow_stats dp_megaflow_stats;
1249        int err;
1250
1251        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1252                                   flags, cmd);
1253        if (!ovs_header)
1254                goto error;
1255
1256        ovs_header->dp_ifindex = get_dpifindex(dp);
1257
1258        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1259        if (err)
1260                goto nla_put_failure;
1261
1262        get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1263        if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1264                        &dp_stats))
1265                goto nla_put_failure;
1266
1267        if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1268                        sizeof(struct ovs_dp_megaflow_stats),
1269                        &dp_megaflow_stats))
1270                goto nla_put_failure;
1271
1272        if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1273                goto nla_put_failure;
1274
1275        return genlmsg_end(skb, ovs_header);
1276
1277nla_put_failure:
1278        genlmsg_cancel(skb, ovs_header);
1279error:
1280        return -EMSGSIZE;
1281}
1282
1283static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1284{
1285        return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
1286}
1287
1288/* Called with rcu_read_lock or ovs_mutex. */
1289static struct datapath *lookup_datapath(struct net *net,
1290                                        struct ovs_header *ovs_header,
1291                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1292{
1293        struct datapath *dp;
1294
1295        if (!a[OVS_DP_ATTR_NAME])
1296                dp = get_dp(net, ovs_header->dp_ifindex);
1297        else {
1298                struct vport *vport;
1299
1300                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1301                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1302        }
1303        return dp ? dp : ERR_PTR(-ENODEV);
1304}
1305
1306static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1307{
1308        struct datapath *dp;
1309
1310        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1311        if (IS_ERR(dp))
1312                return;
1313
1314        WARN(dp->user_features, "Dropping previously announced user features\n");
1315        dp->user_features = 0;
1316}
1317
1318static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
1319{
1320        if (a[OVS_DP_ATTR_USER_FEATURES])
1321                dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1322}
1323
1324static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1325{
1326        struct nlattr **a = info->attrs;
1327        struct vport_parms parms;
1328        struct sk_buff *reply;
1329        struct datapath *dp;
1330        struct vport *vport;
1331        struct ovs_net *ovs_net;
1332        int err, i;
1333
1334        err = -EINVAL;
1335        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1336                goto err;
1337
1338        reply = ovs_dp_cmd_alloc_info(info);
1339        if (!reply)
1340                return -ENOMEM;
1341
1342        err = -ENOMEM;
1343        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1344        if (dp == NULL)
1345                goto err_free_reply;
1346
1347        ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1348
1349        /* Allocate table. */
1350        err = ovs_flow_tbl_init(&dp->table);
1351        if (err)
1352                goto err_free_dp;
1353
1354        dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1355        if (!dp->stats_percpu) {
1356                err = -ENOMEM;
1357                goto err_destroy_table;
1358        }
1359
1360        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1361                            GFP_KERNEL);
1362        if (!dp->ports) {
1363                err = -ENOMEM;
1364                goto err_destroy_percpu;
1365        }
1366
1367        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1368                INIT_HLIST_HEAD(&dp->ports[i]);
1369
1370        /* Set up our datapath device. */
1371        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1372        parms.type = OVS_VPORT_TYPE_INTERNAL;
1373        parms.options = NULL;
1374        parms.dp = dp;
1375        parms.port_no = OVSP_LOCAL;
1376        parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1377
1378        ovs_dp_change(dp, a);
1379
1380        /* So far only local changes have been made, now need the lock. */
1381        ovs_lock();
1382
1383        vport = new_vport(&parms);
1384        if (IS_ERR(vport)) {
1385                err = PTR_ERR(vport);
1386                if (err == -EBUSY)
1387                        err = -EEXIST;
1388
1389                if (err == -EEXIST) {
1390                        /* An outdated user space instance that does not understand
1391                         * the concept of user_features has attempted to create a new
1392                         * datapath and is likely to reuse it. Drop all user features.
1393                         */
1394                        if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1395                                ovs_dp_reset_user_features(skb, info);
1396                }
1397
1398                goto err_destroy_ports_array;
1399        }
1400
1401        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1402                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1403        BUG_ON(err < 0);
1404
1405        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1406        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1407
1408        ovs_unlock();
1409
1410        ovs_notify(&dp_datapath_genl_family, reply, info);
1411        return 0;
1412
1413err_destroy_ports_array:
1414        ovs_unlock();
1415        kfree(dp->ports);
1416err_destroy_percpu:
1417        free_percpu(dp->stats_percpu);
1418err_destroy_table:
1419        ovs_flow_tbl_destroy(&dp->table, false);
1420err_free_dp:
1421        release_net(ovs_dp_get_net(dp));
1422        kfree(dp);
1423err_free_reply:
1424        kfree_skb(reply);
1425err:
1426        return err;
1427}
1428
1429/* Called with ovs_mutex. */
1430static void __dp_destroy(struct datapath *dp)
1431{
1432        int i;
1433
1434        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1435                struct vport *vport;
1436                struct hlist_node *n;
1437
1438                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1439                        if (vport->port_no != OVSP_LOCAL)
1440                                ovs_dp_detach_port(vport);
1441        }
1442
1443        list_del_rcu(&dp->list_node);
1444
1445        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1446         * all ports in datapath are destroyed first before freeing datapath.
1447         */
1448        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1449
1450        /* RCU destroy the flow table */
1451        ovs_flow_tbl_destroy(&dp->table, true);
1452
1453        call_rcu(&dp->rcu, destroy_dp_rcu);
1454}
1455
1456static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1457{
1458        struct sk_buff *reply;
1459        struct datapath *dp;
1460        int err;
1461
1462        reply = ovs_dp_cmd_alloc_info(info);
1463        if (!reply)
1464                return -ENOMEM;
1465
1466        ovs_lock();
1467        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1468        err = PTR_ERR(dp);
1469        if (IS_ERR(dp))
1470                goto err_unlock_free;
1471
1472        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1473                                   info->snd_seq, 0, OVS_DP_CMD_DEL);
1474        BUG_ON(err < 0);
1475
1476        __dp_destroy(dp);
1477        ovs_unlock();
1478
1479        ovs_notify(&dp_datapath_genl_family, reply, info);
1480
1481        return 0;
1482
1483err_unlock_free:
1484        ovs_unlock();
1485        kfree_skb(reply);
1486        return err;
1487}
1488
1489static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1490{
1491        struct sk_buff *reply;
1492        struct datapath *dp;
1493        int err;
1494
1495        reply = ovs_dp_cmd_alloc_info(info);
1496        if (!reply)
1497                return -ENOMEM;
1498
1499        ovs_lock();
1500        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1501        err = PTR_ERR(dp);
1502        if (IS_ERR(dp))
1503                goto err_unlock_free;
1504
1505        ovs_dp_change(dp, info->attrs);
1506
1507        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1508                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1509        BUG_ON(err < 0);
1510
1511        ovs_unlock();
1512        ovs_notify(&dp_datapath_genl_family, reply, info);
1513
1514        return 0;
1515
1516err_unlock_free:
1517        ovs_unlock();
1518        kfree_skb(reply);
1519        return err;
1520}
1521
1522static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1523{
1524        struct sk_buff *reply;
1525        struct datapath *dp;
1526        int err;
1527
1528        reply = ovs_dp_cmd_alloc_info(info);
1529        if (!reply)
1530                return -ENOMEM;
1531
1532        rcu_read_lock();
1533        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1534        if (IS_ERR(dp)) {
1535                err = PTR_ERR(dp);
1536                goto err_unlock_free;
1537        }
1538        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1539                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1540        BUG_ON(err < 0);
1541        rcu_read_unlock();
1542
1543        return genlmsg_reply(reply, info);
1544
1545err_unlock_free:
1546        rcu_read_unlock();
1547        kfree_skb(reply);
1548        return err;
1549}
1550
1551static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1552{
1553        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1554        struct datapath *dp;
1555        int skip = cb->args[0];
1556        int i = 0;
1557
1558        rcu_read_lock();
1559        list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
1560                if (i >= skip &&
1561                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1562                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1563                                         OVS_DP_CMD_NEW) < 0)
1564                        break;
1565                i++;
1566        }
1567        rcu_read_unlock();
1568
1569        cb->args[0] = i;
1570
1571        return skb->len;
1572}
1573
1574static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1575        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1576        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1577        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1578};
1579
1580static struct genl_ops dp_datapath_genl_ops[] = {
1581        { .cmd = OVS_DP_CMD_NEW,
1582          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1583          .policy = datapath_policy,
1584          .doit = ovs_dp_cmd_new
1585        },
1586        { .cmd = OVS_DP_CMD_DEL,
1587          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1588          .policy = datapath_policy,
1589          .doit = ovs_dp_cmd_del
1590        },
1591        { .cmd = OVS_DP_CMD_GET,
1592          .flags = 0,               /* OK for unprivileged users. */
1593          .policy = datapath_policy,
1594          .doit = ovs_dp_cmd_get,
1595          .dumpit = ovs_dp_cmd_dump
1596        },
1597        { .cmd = OVS_DP_CMD_SET,
1598          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1599          .policy = datapath_policy,
1600          .doit = ovs_dp_cmd_set,
1601        },
1602};
1603
1604static struct genl_family dp_datapath_genl_family = {
1605        .id = GENL_ID_GENERATE,
1606        .hdrsize = sizeof(struct ovs_header),
1607        .name = OVS_DATAPATH_FAMILY,
1608        .version = OVS_DATAPATH_VERSION,
1609        .maxattr = OVS_DP_ATTR_MAX,
1610        .netnsok = true,
1611        .parallel_ops = true,
1612        .ops = dp_datapath_genl_ops,
1613        .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1614        .mcgrps = &ovs_dp_datapath_multicast_group,
1615        .n_mcgrps = 1,
1616};
1617
1618/* Called with ovs_mutex or RCU read lock. */
1619static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1620                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1621{
1622        struct ovs_header *ovs_header;
1623        struct ovs_vport_stats vport_stats;
1624        int err;
1625
1626        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1627                                 flags, cmd);
1628        if (!ovs_header)
1629                return -EMSGSIZE;
1630
1631        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1632
1633        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1634            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1635            nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1636            nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1637                goto nla_put_failure;
1638
1639        ovs_vport_get_stats(vport, &vport_stats);
1640        if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1641                    &vport_stats))
1642                goto nla_put_failure;
1643
1644        err = ovs_vport_get_options(vport, skb);
1645        if (err == -EMSGSIZE)
1646                goto error;
1647
1648        return genlmsg_end(skb, ovs_header);
1649
1650nla_put_failure:
1651        err = -EMSGSIZE;
1652error:
1653        genlmsg_cancel(skb, ovs_header);
1654        return err;
1655}
1656
1657static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1658{
1659        return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1660}
1661
1662/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1663struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1664                                         u32 seq, u8 cmd)
1665{
1666        struct sk_buff *skb;
1667        int retval;
1668
1669        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1670        if (!skb)
1671                return ERR_PTR(-ENOMEM);
1672
1673        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1674        BUG_ON(retval < 0);
1675
1676        return skb;
1677}
1678
1679/* Called with ovs_mutex or RCU read lock. */
1680static struct vport *lookup_vport(struct net *net,
1681                                  struct ovs_header *ovs_header,
1682                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1683{
1684        struct datapath *dp;
1685        struct vport *vport;
1686
1687        if (a[OVS_VPORT_ATTR_NAME]) {
1688                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1689                if (!vport)
1690                        return ERR_PTR(-ENODEV);
1691                if (ovs_header->dp_ifindex &&
1692                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1693                        return ERR_PTR(-ENODEV);
1694                return vport;
1695        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1696                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1697
1698                if (port_no >= DP_MAX_PORTS)
1699                        return ERR_PTR(-EFBIG);
1700
1701                dp = get_dp(net, ovs_header->dp_ifindex);
1702                if (!dp)
1703                        return ERR_PTR(-ENODEV);
1704
1705                vport = ovs_vport_ovsl_rcu(dp, port_no);
1706                if (!vport)
1707                        return ERR_PTR(-ENODEV);
1708                return vport;
1709        } else
1710                return ERR_PTR(-EINVAL);
1711}
1712
1713static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1714{
1715        struct nlattr **a = info->attrs;
1716        struct ovs_header *ovs_header = info->userhdr;
1717        struct vport_parms parms;
1718        struct sk_buff *reply;
1719        struct vport *vport;
1720        struct datapath *dp;
1721        u32 port_no;
1722        int err;
1723
1724        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1725            !a[OVS_VPORT_ATTR_UPCALL_PID])
1726                return -EINVAL;
1727
1728        port_no = a[OVS_VPORT_ATTR_PORT_NO]
1729                ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1730        if (port_no >= DP_MAX_PORTS)
1731                return -EFBIG;
1732
1733        reply = ovs_vport_cmd_alloc_info();
1734        if (!reply)
1735                return -ENOMEM;
1736
1737        ovs_lock();
1738        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1739        err = -ENODEV;
1740        if (!dp)
1741                goto exit_unlock_free;
1742
1743        if (port_no) {
1744                vport = ovs_vport_ovsl(dp, port_no);
1745                err = -EBUSY;
1746                if (vport)
1747                        goto exit_unlock_free;
1748        } else {
1749                for (port_no = 1; ; port_no++) {
1750                        if (port_no >= DP_MAX_PORTS) {
1751                                err = -EFBIG;
1752                                goto exit_unlock_free;
1753                        }
1754                        vport = ovs_vport_ovsl(dp, port_no);
1755                        if (!vport)
1756                                break;
1757                }
1758        }
1759
1760        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1761        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1762        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1763        parms.dp = dp;
1764        parms.port_no = port_no;
1765        parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1766
1767        vport = new_vport(&parms);
1768        err = PTR_ERR(vport);
1769        if (IS_ERR(vport))
1770                goto exit_unlock_free;
1771
1772        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1773                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1774        BUG_ON(err < 0);
1775        ovs_unlock();
1776
1777        ovs_notify(&dp_vport_genl_family, reply, info);
1778        return 0;
1779
1780exit_unlock_free:
1781        ovs_unlock();
1782        kfree_skb(reply);
1783        return err;
1784}
1785
1786static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1787{
1788        struct nlattr **a = info->attrs;
1789        struct sk_buff *reply;
1790        struct vport *vport;
1791        int err;
1792
1793        reply = ovs_vport_cmd_alloc_info();
1794        if (!reply)
1795                return -ENOMEM;
1796
1797        ovs_lock();
1798        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1799        err = PTR_ERR(vport);
1800        if (IS_ERR(vport))
1801                goto exit_unlock_free;
1802
1803        if (a[OVS_VPORT_ATTR_TYPE] &&
1804            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
1805                err = -EINVAL;
1806                goto exit_unlock_free;
1807        }
1808
1809        if (a[OVS_VPORT_ATTR_OPTIONS]) {
1810                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1811                if (err)
1812                        goto exit_unlock_free;
1813        }
1814
1815        if (a[OVS_VPORT_ATTR_UPCALL_PID])
1816                vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1817
1818        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1819                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1820        BUG_ON(err < 0);
1821
1822        ovs_unlock();
1823        ovs_notify(&dp_vport_genl_family, reply, info);
1824        return 0;
1825
1826exit_unlock_free:
1827        ovs_unlock();
1828        kfree_skb(reply);
1829        return err;
1830}
1831
1832static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1833{
1834        struct nlattr **a = info->attrs;
1835        struct sk_buff *reply;
1836        struct vport *vport;
1837        int err;
1838
1839        reply = ovs_vport_cmd_alloc_info();
1840        if (!reply)
1841                return -ENOMEM;
1842
1843        ovs_lock();
1844        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1845        err = PTR_ERR(vport);
1846        if (IS_ERR(vport))
1847                goto exit_unlock_free;
1848
1849        if (vport->port_no == OVSP_LOCAL) {
1850                err = -EINVAL;
1851                goto exit_unlock_free;
1852        }
1853
1854        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1855                                      info->snd_seq, 0, OVS_VPORT_CMD_DEL);
1856        BUG_ON(err < 0);
1857        ovs_dp_detach_port(vport);
1858        ovs_unlock();
1859
1860        ovs_notify(&dp_vport_genl_family, reply, info);
1861        return 0;
1862
1863exit_unlock_free:
1864        ovs_unlock();
1865        kfree_skb(reply);
1866        return err;
1867}
1868
1869static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1870{
1871        struct nlattr **a = info->attrs;
1872        struct ovs_header *ovs_header = info->userhdr;
1873        struct sk_buff *reply;
1874        struct vport *vport;
1875        int err;
1876
1877        reply = ovs_vport_cmd_alloc_info();
1878        if (!reply)
1879                return -ENOMEM;
1880
1881        rcu_read_lock();
1882        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
1883        err = PTR_ERR(vport);
1884        if (IS_ERR(vport))
1885                goto exit_unlock_free;
1886        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1887                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1888        BUG_ON(err < 0);
1889        rcu_read_unlock();
1890
1891        return genlmsg_reply(reply, info);
1892
1893exit_unlock_free:
1894        rcu_read_unlock();
1895        kfree_skb(reply);
1896        return err;
1897}
1898
1899static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1900{
1901        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1902        struct datapath *dp;
1903        int bucket = cb->args[0], skip = cb->args[1];
1904        int i, j = 0;
1905
1906        rcu_read_lock();
1907        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1908        if (!dp) {
1909                rcu_read_unlock();
1910                return -ENODEV;
1911        }
1912        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
1913                struct vport *vport;
1914
1915                j = 0;
1916                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1917                        if (j >= skip &&
1918                            ovs_vport_cmd_fill_info(vport, skb,
1919                                                    NETLINK_CB(cb->skb).portid,
1920                                                    cb->nlh->nlmsg_seq,
1921                                                    NLM_F_MULTI,
1922                                                    OVS_VPORT_CMD_NEW) < 0)
1923                                goto out;
1924
1925                        j++;
1926                }
1927                skip = 0;
1928        }
1929out:
1930        rcu_read_unlock();
1931
1932        cb->args[0] = i;
1933        cb->args[1] = j;
1934
1935        return skb->len;
1936}
1937
1938static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1939        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1940        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1941        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1942        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1943        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1944        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1945};
1946
1947static struct genl_ops dp_vport_genl_ops[] = {
1948        { .cmd = OVS_VPORT_CMD_NEW,
1949          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1950          .policy = vport_policy,
1951          .doit = ovs_vport_cmd_new
1952        },
1953        { .cmd = OVS_VPORT_CMD_DEL,
1954          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1955          .policy = vport_policy,
1956          .doit = ovs_vport_cmd_del
1957        },
1958        { .cmd = OVS_VPORT_CMD_GET,
1959          .flags = 0,               /* OK for unprivileged users. */
1960          .policy = vport_policy,
1961          .doit = ovs_vport_cmd_get,
1962          .dumpit = ovs_vport_cmd_dump
1963        },
1964        { .cmd = OVS_VPORT_CMD_SET,
1965          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1966          .policy = vport_policy,
1967          .doit = ovs_vport_cmd_set,
1968        },
1969};
1970
1971struct genl_family dp_vport_genl_family = {
1972        .id = GENL_ID_GENERATE,
1973        .hdrsize = sizeof(struct ovs_header),
1974        .name = OVS_VPORT_FAMILY,
1975        .version = OVS_VPORT_VERSION,
1976        .maxattr = OVS_VPORT_ATTR_MAX,
1977        .netnsok = true,
1978        .parallel_ops = true,
1979        .ops = dp_vport_genl_ops,
1980        .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
1981        .mcgrps = &ovs_dp_vport_multicast_group,
1982        .n_mcgrps = 1,
1983};
1984
1985static struct genl_family * const dp_genl_families[] = {
1986        &dp_datapath_genl_family,
1987        &dp_vport_genl_family,
1988        &dp_flow_genl_family,
1989        &dp_packet_genl_family,
1990};
1991
1992static void dp_unregister_genl(int n_families)
1993{
1994        int i;
1995
1996        for (i = 0; i < n_families; i++)
1997                genl_unregister_family(dp_genl_families[i]);
1998}
1999
2000static int dp_register_genl(void)
2001{
2002        int err;
2003        int i;
2004
2005        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2006
2007                err = genl_register_family(dp_genl_families[i]);
2008                if (err)
2009                        goto error;
2010        }
2011
2012        return 0;
2013
2014error:
2015        dp_unregister_genl(i);
2016        return err;
2017}
2018
2019static int __net_init ovs_init_net(struct net *net)
2020{
2021        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2022
2023        INIT_LIST_HEAD(&ovs_net->dps);
2024        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2025        return 0;
2026}
2027
2028static void __net_exit ovs_exit_net(struct net *net)
2029{
2030        struct datapath *dp, *dp_next;
2031        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2032
2033        ovs_lock();
2034        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2035                __dp_destroy(dp);
2036        ovs_unlock();
2037
2038        cancel_work_sync(&ovs_net->dp_notify_work);
2039}
2040
2041static struct pernet_operations ovs_net_ops = {
2042        .init = ovs_init_net,
2043        .exit = ovs_exit_net,
2044        .id   = &ovs_net_id,
2045        .size = sizeof(struct ovs_net),
2046};
2047
2048static int __init dp_init(void)
2049{
2050        int err;
2051
2052        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2053
2054        pr_info("Open vSwitch switching datapath\n");
2055
2056        err = ovs_flow_init();
2057        if (err)
2058                goto error;
2059
2060        err = ovs_vport_init();
2061        if (err)
2062                goto error_flow_exit;
2063
2064        err = register_pernet_device(&ovs_net_ops);
2065        if (err)
2066                goto error_vport_exit;
2067
2068        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2069        if (err)
2070                goto error_netns_exit;
2071
2072        err = dp_register_genl();
2073        if (err < 0)
2074                goto error_unreg_notifier;
2075
2076        return 0;
2077
2078error_unreg_notifier:
2079        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2080error_netns_exit:
2081        unregister_pernet_device(&ovs_net_ops);
2082error_vport_exit:
2083        ovs_vport_exit();
2084error_flow_exit:
2085        ovs_flow_exit();
2086error:
2087        return err;
2088}
2089
2090static void dp_cleanup(void)
2091{
2092        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2093        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2094        unregister_pernet_device(&ovs_net_ops);
2095        rcu_barrier();
2096        ovs_vport_exit();
2097        ovs_flow_exit();
2098}
2099
2100module_init(dp_init);
2101module_exit(dp_cleanup);
2102
2103MODULE_DESCRIPTION("Open vSwitch switching datapath");
2104MODULE_LICENSE("GPL");
2105