linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2014 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/openvswitch.h>
  48#include <linux/rculist.h>
  49#include <linux/dmi.h>
  50#include <net/genetlink.h>
  51#include <net/net_namespace.h>
  52#include <net/netns/generic.h>
  53
  54#include "datapath.h"
  55#include "flow.h"
  56#include "flow_table.h"
  57#include "flow_netlink.h"
  58#include "vport-internal_dev.h"
  59#include "vport-netdev.h"
  60
  61unsigned int ovs_net_id __read_mostly;
  62
  63static struct genl_family dp_packet_genl_family;
  64static struct genl_family dp_flow_genl_family;
  65static struct genl_family dp_datapath_genl_family;
  66
  67static const struct nla_policy flow_policy[];
  68
  69static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
  70        .name = OVS_FLOW_MCGROUP,
  71};
  72
  73static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  74        .name = OVS_DATAPATH_MCGROUP,
  75};
  76
  77static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
  78        .name = OVS_VPORT_MCGROUP,
  79};
  80
  81/* Check if need to build a reply message.
  82 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  83static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
  84                            unsigned int group)
  85{
  86        return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  87               genl_has_listeners(family, genl_info_net(info), group);
  88}
  89
  90static void ovs_notify(struct genl_family *family,
  91                       struct sk_buff *skb, struct genl_info *info)
  92{
  93        genl_notify(family, skb, info, 0, GFP_KERNEL);
  94}
  95
  96/**
  97 * DOC: Locking:
  98 *
  99 * All writes e.g. Writes to device state (add/remove datapath, port, set
 100 * operations on vports, etc.), Writes to other state (flow table
 101 * modifications, set miscellaneous datapath parameters, etc.) are protected
 102 * by ovs_lock.
 103 *
 104 * Reads are protected by RCU.
 105 *
 106 * There are a few special cases (mostly stats) that have their own
 107 * synchronization but they nest under all of above and don't interact with
 108 * each other.
 109 *
 110 * The RTNL lock nests inside ovs_mutex.
 111 */
 112
 113static DEFINE_MUTEX(ovs_mutex);
 114
 115void ovs_lock(void)
 116{
 117        mutex_lock(&ovs_mutex);
 118}
 119
 120void ovs_unlock(void)
 121{
 122        mutex_unlock(&ovs_mutex);
 123}
 124
 125#ifdef CONFIG_LOCKDEP
 126int lockdep_ovsl_is_held(void)
 127{
 128        if (debug_locks)
 129                return lockdep_is_held(&ovs_mutex);
 130        else
 131                return 1;
 132}
 133#endif
 134
 135static struct vport *new_vport(const struct vport_parms *);
 136static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 137                             const struct sw_flow_key *,
 138                             const struct dp_upcall_info *,
 139                             uint32_t cutlen);
 140static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 141                                  const struct sw_flow_key *,
 142                                  const struct dp_upcall_info *,
 143                                  uint32_t cutlen);
 144
 145/* Must be called with rcu_read_lock. */
 146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
 147{
 148        struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
 149
 150        if (dev) {
 151                struct vport *vport = ovs_internal_dev_get_vport(dev);
 152                if (vport)
 153                        return vport->dp;
 154        }
 155
 156        return NULL;
 157}
 158
 159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
 160 * returned dp pointer valid.
 161 */
 162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
 163{
 164        struct datapath *dp;
 165
 166        WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
 167        rcu_read_lock();
 168        dp = get_dp_rcu(net, dp_ifindex);
 169        rcu_read_unlock();
 170
 171        return dp;
 172}
 173
 174/* Must be called with rcu_read_lock or ovs_mutex. */
 175const char *ovs_dp_name(const struct datapath *dp)
 176{
 177        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 178        return ovs_vport_name(vport);
 179}
 180
 181static int get_dpifindex(const struct datapath *dp)
 182{
 183        struct vport *local;
 184        int ifindex;
 185
 186        rcu_read_lock();
 187
 188        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 189        if (local)
 190                ifindex = local->dev->ifindex;
 191        else
 192                ifindex = 0;
 193
 194        rcu_read_unlock();
 195
 196        return ifindex;
 197}
 198
 199static void destroy_dp_rcu(struct rcu_head *rcu)
 200{
 201        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 202
 203        ovs_flow_tbl_destroy(&dp->table);
 204        free_percpu(dp->stats_percpu);
 205        kfree(dp->ports);
 206        kfree(dp);
 207}
 208
 209static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 210                                            u16 port_no)
 211{
 212        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 213}
 214
 215/* Called with ovs_mutex or RCU read lock. */
 216struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 217{
 218        struct vport *vport;
 219        struct hlist_head *head;
 220
 221        head = vport_hash_bucket(dp, port_no);
 222        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 223                if (vport->port_no == port_no)
 224                        return vport;
 225        }
 226        return NULL;
 227}
 228
 229/* Called with ovs_mutex. */
 230static struct vport *new_vport(const struct vport_parms *parms)
 231{
 232        struct vport *vport;
 233
 234        vport = ovs_vport_add(parms);
 235        if (!IS_ERR(vport)) {
 236                struct datapath *dp = parms->dp;
 237                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 238
 239                hlist_add_head_rcu(&vport->dp_hash_node, head);
 240        }
 241        return vport;
 242}
 243
 244void ovs_dp_detach_port(struct vport *p)
 245{
 246        ASSERT_OVSL();
 247
 248        /* First drop references to device. */
 249        hlist_del_rcu(&p->dp_hash_node);
 250
 251        /* Then destroy it. */
 252        ovs_vport_del(p);
 253}
 254
 255/* Must be called with rcu_read_lock. */
 256void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 257{
 258        const struct vport *p = OVS_CB(skb)->input_vport;
 259        struct datapath *dp = p->dp;
 260        struct sw_flow *flow;
 261        struct sw_flow_actions *sf_acts;
 262        struct dp_stats_percpu *stats;
 263        u64 *stats_counter;
 264        u32 n_mask_hit;
 265
 266        stats = this_cpu_ptr(dp->stats_percpu);
 267
 268        /* Look up flow. */
 269        flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
 270        if (unlikely(!flow)) {
 271                struct dp_upcall_info upcall;
 272                int error;
 273
 274                memset(&upcall, 0, sizeof(upcall));
 275                upcall.cmd = OVS_PACKET_CMD_MISS;
 276                upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 277                upcall.mru = OVS_CB(skb)->mru;
 278                error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
 279                if (unlikely(error))
 280                        kfree_skb(skb);
 281                else
 282                        consume_skb(skb);
 283                stats_counter = &stats->n_missed;
 284                goto out;
 285        }
 286
 287        ovs_flow_stats_update(flow, key->tp.flags, skb);
 288        sf_acts = rcu_dereference(flow->sf_acts);
 289        ovs_execute_actions(dp, skb, sf_acts, key);
 290
 291        stats_counter = &stats->n_hit;
 292
 293out:
 294        /* Update datapath statistics. */
 295        u64_stats_update_begin(&stats->syncp);
 296        (*stats_counter)++;
 297        stats->n_mask_hit += n_mask_hit;
 298        u64_stats_update_end(&stats->syncp);
 299}
 300
 301int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 302                  const struct sw_flow_key *key,
 303                  const struct dp_upcall_info *upcall_info,
 304                  uint32_t cutlen)
 305{
 306        struct dp_stats_percpu *stats;
 307        int err;
 308
 309        if (upcall_info->portid == 0) {
 310                err = -ENOTCONN;
 311                goto err;
 312        }
 313
 314        if (!skb_is_gso(skb))
 315                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 316        else
 317                err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
 318        if (err)
 319                goto err;
 320
 321        return 0;
 322
 323err:
 324        stats = this_cpu_ptr(dp->stats_percpu);
 325
 326        u64_stats_update_begin(&stats->syncp);
 327        stats->n_lost++;
 328        u64_stats_update_end(&stats->syncp);
 329
 330        return err;
 331}
 332
 333static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 334                             const struct sw_flow_key *key,
 335                             const struct dp_upcall_info *upcall_info,
 336                                 uint32_t cutlen)
 337{
 338        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 339        struct sw_flow_key later_key;
 340        struct sk_buff *segs, *nskb;
 341        int err;
 342
 343        BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
 344        segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 345        if (IS_ERR(segs))
 346                return PTR_ERR(segs);
 347        if (segs == NULL)
 348                return -EINVAL;
 349
 350        if (gso_type & SKB_GSO_UDP) {
 351                /* The initial flow key extracted by ovs_flow_key_extract()
 352                 * in this case is for a first fragment, so we need to
 353                 * properly mark later fragments.
 354                 */
 355                later_key = *key;
 356                later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 357        }
 358
 359        /* Queue all of the segments. */
 360        skb = segs;
 361        do {
 362                if (gso_type & SKB_GSO_UDP && skb != segs)
 363                        key = &later_key;
 364
 365                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 366                if (err)
 367                        break;
 368
 369        } while ((skb = skb->next));
 370
 371        /* Free all of the segments. */
 372        skb = segs;
 373        do {
 374                nskb = skb->next;
 375                if (err)
 376                        kfree_skb(skb);
 377                else
 378                        consume_skb(skb);
 379        } while ((skb = nskb));
 380        return err;
 381}
 382
 383static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 384                              unsigned int hdrlen, int actions_attrlen)
 385{
 386        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 387                + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
 388                + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
 389                + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
 390
 391        /* OVS_PACKET_ATTR_USERDATA */
 392        if (upcall_info->userdata)
 393                size += NLA_ALIGN(upcall_info->userdata->nla_len);
 394
 395        /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
 396        if (upcall_info->egress_tun_info)
 397                size += nla_total_size(ovs_tun_key_attr_size());
 398
 399        /* OVS_PACKET_ATTR_ACTIONS */
 400        if (upcall_info->actions_len)
 401                size += nla_total_size(actions_attrlen);
 402
 403        /* OVS_PACKET_ATTR_MRU */
 404        if (upcall_info->mru)
 405                size += nla_total_size(sizeof(upcall_info->mru));
 406
 407        return size;
 408}
 409
 410static void pad_packet(struct datapath *dp, struct sk_buff *skb)
 411{
 412        if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
 413                size_t plen = NLA_ALIGN(skb->len) - skb->len;
 414
 415                if (plen > 0)
 416                        skb_put_zero(skb, plen);
 417        }
 418}
 419
 420static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 421                                  const struct sw_flow_key *key,
 422                                  const struct dp_upcall_info *upcall_info,
 423                                  uint32_t cutlen)
 424{
 425        struct ovs_header *upcall;
 426        struct sk_buff *nskb = NULL;
 427        struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 428        struct nlattr *nla;
 429        size_t len;
 430        unsigned int hlen;
 431        int err, dp_ifindex;
 432
 433        dp_ifindex = get_dpifindex(dp);
 434        if (!dp_ifindex)
 435                return -ENODEV;
 436
 437        if (skb_vlan_tag_present(skb)) {
 438                nskb = skb_clone(skb, GFP_ATOMIC);
 439                if (!nskb)
 440                        return -ENOMEM;
 441
 442                nskb = __vlan_hwaccel_push_inside(nskb);
 443                if (!nskb)
 444                        return -ENOMEM;
 445
 446                skb = nskb;
 447        }
 448
 449        if (nla_attr_size(skb->len) > USHRT_MAX) {
 450                err = -EFBIG;
 451                goto out;
 452        }
 453
 454        /* Complete checksum if needed */
 455        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 456            (err = skb_csum_hwoffload_help(skb, 0)))
 457                goto out;
 458
 459        /* Older versions of OVS user space enforce alignment of the last
 460         * Netlink attribute to NLA_ALIGNTO which would require extensive
 461         * padding logic. Only perform zerocopy if padding is not required.
 462         */
 463        if (dp->user_features & OVS_DP_F_UNALIGNED)
 464                hlen = skb_zerocopy_headlen(skb);
 465        else
 466                hlen = skb->len;
 467
 468        len = upcall_msg_size(upcall_info, hlen - cutlen,
 469                              OVS_CB(skb)->acts_origlen);
 470        user_skb = genlmsg_new(len, GFP_ATOMIC);
 471        if (!user_skb) {
 472                err = -ENOMEM;
 473                goto out;
 474        }
 475
 476        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 477                             0, upcall_info->cmd);
 478        upcall->dp_ifindex = dp_ifindex;
 479
 480        err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 481        BUG_ON(err);
 482
 483        if (upcall_info->userdata)
 484                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 485                          nla_len(upcall_info->userdata),
 486                          nla_data(upcall_info->userdata));
 487
 488        if (upcall_info->egress_tun_info) {
 489                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
 490                err = ovs_nla_put_tunnel_info(user_skb,
 491                                              upcall_info->egress_tun_info);
 492                BUG_ON(err);
 493                nla_nest_end(user_skb, nla);
 494        }
 495
 496        if (upcall_info->actions_len) {
 497                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
 498                err = ovs_nla_put_actions(upcall_info->actions,
 499                                          upcall_info->actions_len,
 500                                          user_skb);
 501                if (!err)
 502                        nla_nest_end(user_skb, nla);
 503                else
 504                        nla_nest_cancel(user_skb, nla);
 505        }
 506
 507        /* Add OVS_PACKET_ATTR_MRU */
 508        if (upcall_info->mru) {
 509                if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
 510                                upcall_info->mru)) {
 511                        err = -ENOBUFS;
 512                        goto out;
 513                }
 514                pad_packet(dp, user_skb);
 515        }
 516
 517        /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
 518        if (cutlen > 0) {
 519                if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
 520                                skb->len)) {
 521                        err = -ENOBUFS;
 522                        goto out;
 523                }
 524                pad_packet(dp, user_skb);
 525        }
 526
 527        /* Only reserve room for attribute header, packet data is added
 528         * in skb_zerocopy() */
 529        if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 530                err = -ENOBUFS;
 531                goto out;
 532        }
 533        nla->nla_len = nla_attr_size(skb->len - cutlen);
 534
 535        err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
 536        if (err)
 537                goto out;
 538
 539        /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
 540        pad_packet(dp, user_skb);
 541
 542        ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 543
 544        err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
 545        user_skb = NULL;
 546out:
 547        if (err)
 548                skb_tx_error(skb);
 549        kfree_skb(user_skb);
 550        kfree_skb(nskb);
 551        return err;
 552}
 553
 554static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 555{
 556        struct ovs_header *ovs_header = info->userhdr;
 557        struct net *net = sock_net(skb->sk);
 558        struct nlattr **a = info->attrs;
 559        struct sw_flow_actions *acts;
 560        struct sk_buff *packet;
 561        struct sw_flow *flow;
 562        struct sw_flow_actions *sf_acts;
 563        struct datapath *dp;
 564        struct vport *input_vport;
 565        u16 mru = 0;
 566        int len;
 567        int err;
 568        bool log = !a[OVS_PACKET_ATTR_PROBE];
 569
 570        err = -EINVAL;
 571        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 572            !a[OVS_PACKET_ATTR_ACTIONS])
 573                goto err;
 574
 575        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 576        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 577        err = -ENOMEM;
 578        if (!packet)
 579                goto err;
 580        skb_reserve(packet, NET_IP_ALIGN);
 581
 582        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 583
 584        /* Set packet's mru */
 585        if (a[OVS_PACKET_ATTR_MRU]) {
 586                mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
 587                packet->ignore_df = 1;
 588        }
 589        OVS_CB(packet)->mru = mru;
 590
 591        /* Build an sw_flow for sending this packet. */
 592        flow = ovs_flow_alloc();
 593        err = PTR_ERR(flow);
 594        if (IS_ERR(flow))
 595                goto err_kfree_skb;
 596
 597        err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
 598                                             packet, &flow->key, log);
 599        if (err)
 600                goto err_flow_free;
 601
 602        err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
 603                                   &flow->key, &acts, log);
 604        if (err)
 605                goto err_flow_free;
 606
 607        rcu_assign_pointer(flow->sf_acts, acts);
 608        packet->priority = flow->key.phy.priority;
 609        packet->mark = flow->key.phy.skb_mark;
 610
 611        rcu_read_lock();
 612        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
 613        err = -ENODEV;
 614        if (!dp)
 615                goto err_unlock;
 616
 617        input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
 618        if (!input_vport)
 619                input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
 620
 621        if (!input_vport)
 622                goto err_unlock;
 623
 624        packet->dev = input_vport->dev;
 625        OVS_CB(packet)->input_vport = input_vport;
 626        sf_acts = rcu_dereference(flow->sf_acts);
 627
 628        local_bh_disable();
 629        err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
 630        local_bh_enable();
 631        rcu_read_unlock();
 632
 633        ovs_flow_free(flow, false);
 634        return err;
 635
 636err_unlock:
 637        rcu_read_unlock();
 638err_flow_free:
 639        ovs_flow_free(flow, false);
 640err_kfree_skb:
 641        kfree_skb(packet);
 642err:
 643        return err;
 644}
 645
 646static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 647        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 648        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 649        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 650        [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
 651        [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
 652};
 653
 654static const struct genl_ops dp_packet_genl_ops[] = {
 655        { .cmd = OVS_PACKET_CMD_EXECUTE,
 656          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 657          .policy = packet_policy,
 658          .doit = ovs_packet_cmd_execute
 659        }
 660};
 661
 662static struct genl_family dp_packet_genl_family __ro_after_init = {
 663        .hdrsize = sizeof(struct ovs_header),
 664        .name = OVS_PACKET_FAMILY,
 665        .version = OVS_PACKET_VERSION,
 666        .maxattr = OVS_PACKET_ATTR_MAX,
 667        .netnsok = true,
 668        .parallel_ops = true,
 669        .ops = dp_packet_genl_ops,
 670        .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
 671        .module = THIS_MODULE,
 672};
 673
 674static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 675                         struct ovs_dp_megaflow_stats *mega_stats)
 676{
 677        int i;
 678
 679        memset(mega_stats, 0, sizeof(*mega_stats));
 680
 681        stats->n_flows = ovs_flow_tbl_count(&dp->table);
 682        mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 683
 684        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 685
 686        for_each_possible_cpu(i) {
 687                const struct dp_stats_percpu *percpu_stats;
 688                struct dp_stats_percpu local_stats;
 689                unsigned int start;
 690
 691                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 692
 693                do {
 694                        start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
 695                        local_stats = *percpu_stats;
 696                } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
 697
 698                stats->n_hit += local_stats.n_hit;
 699                stats->n_missed += local_stats.n_missed;
 700                stats->n_lost += local_stats.n_lost;
 701                mega_stats->n_mask_hit += local_stats.n_mask_hit;
 702        }
 703}
 704
 705static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
 706{
 707        return ovs_identifier_is_ufid(sfid) &&
 708               !(ufid_flags & OVS_UFID_F_OMIT_KEY);
 709}
 710
 711static bool should_fill_mask(uint32_t ufid_flags)
 712{
 713        return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
 714}
 715
 716static bool should_fill_actions(uint32_t ufid_flags)
 717{
 718        return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
 719}
 720
 721static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
 722                                    const struct sw_flow_id *sfid,
 723                                    uint32_t ufid_flags)
 724{
 725        size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 726
 727        /* OVS_FLOW_ATTR_UFID */
 728        if (sfid && ovs_identifier_is_ufid(sfid))
 729                len += nla_total_size(sfid->ufid_len);
 730
 731        /* OVS_FLOW_ATTR_KEY */
 732        if (!sfid || should_fill_key(sfid, ufid_flags))
 733                len += nla_total_size(ovs_key_attr_size());
 734
 735        /* OVS_FLOW_ATTR_MASK */
 736        if (should_fill_mask(ufid_flags))
 737                len += nla_total_size(ovs_key_attr_size());
 738
 739        /* OVS_FLOW_ATTR_ACTIONS */
 740        if (should_fill_actions(ufid_flags))
 741                len += nla_total_size(acts->orig_len);
 742
 743        return len
 744                + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 745                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 746                + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
 747}
 748
 749/* Called with ovs_mutex or RCU read lock. */
 750static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 751                                   struct sk_buff *skb)
 752{
 753        struct ovs_flow_stats stats;
 754        __be16 tcp_flags;
 755        unsigned long used;
 756
 757        ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 758
 759        if (used &&
 760            nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
 761                              OVS_FLOW_ATTR_PAD))
 762                return -EMSGSIZE;
 763
 764        if (stats.n_packets &&
 765            nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
 766                          sizeof(struct ovs_flow_stats), &stats,
 767                          OVS_FLOW_ATTR_PAD))
 768                return -EMSGSIZE;
 769
 770        if ((u8)ntohs(tcp_flags) &&
 771             nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
 772                return -EMSGSIZE;
 773
 774        return 0;
 775}
 776
 777/* Called with ovs_mutex or RCU read lock. */
 778static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 779                                     struct sk_buff *skb, int skb_orig_len)
 780{
 781        struct nlattr *start;
 782        int err;
 783
 784        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 785         * this is the first flow to be dumped into 'skb'.  This is unusual for
 786         * Netlink but individual action lists can be longer than
 787         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 788         * The userspace caller can always fetch the actions separately if it
 789         * really wants them.  (Most userspace callers in fact don't care.)
 790         *
 791         * This can only fail for dump operations because the skb is always
 792         * properly sized for single flows.
 793         */
 794        start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
 795        if (start) {
 796                const struct sw_flow_actions *sf_acts;
 797
 798                sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 799                err = ovs_nla_put_actions(sf_acts->actions,
 800                                          sf_acts->actions_len, skb);
 801
 802                if (!err)
 803                        nla_nest_end(skb, start);
 804                else {
 805                        if (skb_orig_len)
 806                                return err;
 807
 808                        nla_nest_cancel(skb, start);
 809                }
 810        } else if (skb_orig_len) {
 811                return -EMSGSIZE;
 812        }
 813
 814        return 0;
 815}
 816
 817/* Called with ovs_mutex or RCU read lock. */
 818static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 819                                  struct sk_buff *skb, u32 portid,
 820                                  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 821{
 822        const int skb_orig_len = skb->len;
 823        struct ovs_header *ovs_header;
 824        int err;
 825
 826        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
 827                                 flags, cmd);
 828        if (!ovs_header)
 829                return -EMSGSIZE;
 830
 831        ovs_header->dp_ifindex = dp_ifindex;
 832
 833        err = ovs_nla_put_identifier(flow, skb);
 834        if (err)
 835                goto error;
 836
 837        if (should_fill_key(&flow->id, ufid_flags)) {
 838                err = ovs_nla_put_masked_key(flow, skb);
 839                if (err)
 840                        goto error;
 841        }
 842
 843        if (should_fill_mask(ufid_flags)) {
 844                err = ovs_nla_put_mask(flow, skb);
 845                if (err)
 846                        goto error;
 847        }
 848
 849        err = ovs_flow_cmd_fill_stats(flow, skb);
 850        if (err)
 851                goto error;
 852
 853        if (should_fill_actions(ufid_flags)) {
 854                err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
 855                if (err)
 856                        goto error;
 857        }
 858
 859        genlmsg_end(skb, ovs_header);
 860        return 0;
 861
 862error:
 863        genlmsg_cancel(skb, ovs_header);
 864        return err;
 865}
 866
 867/* May not be called with RCU read lock. */
 868static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
 869                                               const struct sw_flow_id *sfid,
 870                                               struct genl_info *info,
 871                                               bool always,
 872                                               uint32_t ufid_flags)
 873{
 874        struct sk_buff *skb;
 875        size_t len;
 876
 877        if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 878                return NULL;
 879
 880        len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
 881        skb = genlmsg_new(len, GFP_KERNEL);
 882        if (!skb)
 883                return ERR_PTR(-ENOMEM);
 884
 885        return skb;
 886}
 887
 888/* Called with ovs_mutex. */
 889static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 890                                               int dp_ifindex,
 891                                               struct genl_info *info, u8 cmd,
 892                                               bool always, u32 ufid_flags)
 893{
 894        struct sk_buff *skb;
 895        int retval;
 896
 897        skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
 898                                      &flow->id, info, always, ufid_flags);
 899        if (IS_ERR_OR_NULL(skb))
 900                return skb;
 901
 902        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 903                                        info->snd_portid, info->snd_seq, 0,
 904                                        cmd, ufid_flags);
 905        BUG_ON(retval < 0);
 906        return skb;
 907}
 908
 909static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 910{
 911        struct net *net = sock_net(skb->sk);
 912        struct nlattr **a = info->attrs;
 913        struct ovs_header *ovs_header = info->userhdr;
 914        struct sw_flow *flow = NULL, *new_flow;
 915        struct sw_flow_mask mask;
 916        struct sk_buff *reply;
 917        struct datapath *dp;
 918        struct sw_flow_actions *acts;
 919        struct sw_flow_match match;
 920        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 921        int error;
 922        bool log = !a[OVS_FLOW_ATTR_PROBE];
 923
 924        /* Must have key and actions. */
 925        error = -EINVAL;
 926        if (!a[OVS_FLOW_ATTR_KEY]) {
 927                OVS_NLERR(log, "Flow key attr not present in new flow.");
 928                goto error;
 929        }
 930        if (!a[OVS_FLOW_ATTR_ACTIONS]) {
 931                OVS_NLERR(log, "Flow actions attr not present in new flow.");
 932                goto error;
 933        }
 934
 935        /* Most of the time we need to allocate a new flow, do it before
 936         * locking.
 937         */
 938        new_flow = ovs_flow_alloc();
 939        if (IS_ERR(new_flow)) {
 940                error = PTR_ERR(new_flow);
 941                goto error;
 942        }
 943
 944        /* Extract key. */
 945        ovs_match_init(&match, &new_flow->key, false, &mask);
 946        error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 947                                  a[OVS_FLOW_ATTR_MASK], log);
 948        if (error)
 949                goto err_kfree_flow;
 950
 951        /* Extract flow identifier. */
 952        error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
 953                                       &new_flow->key, log);
 954        if (error)
 955                goto err_kfree_flow;
 956
 957        /* unmasked key is needed to match when ufid is not used. */
 958        if (ovs_identifier_is_key(&new_flow->id))
 959                match.key = new_flow->id.unmasked_key;
 960
 961        ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
 962
 963        /* Validate actions. */
 964        error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 965                                     &new_flow->key, &acts, log);
 966        if (error) {
 967                OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
 968                goto err_kfree_flow;
 969        }
 970
 971        reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
 972                                        ufid_flags);
 973        if (IS_ERR(reply)) {
 974                error = PTR_ERR(reply);
 975                goto err_kfree_acts;
 976        }
 977
 978        ovs_lock();
 979        dp = get_dp(net, ovs_header->dp_ifindex);
 980        if (unlikely(!dp)) {
 981                error = -ENODEV;
 982                goto err_unlock_ovs;
 983        }
 984
 985        /* Check if this is a duplicate flow */
 986        if (ovs_identifier_is_ufid(&new_flow->id))
 987                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
 988        if (!flow)
 989                flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
 990        if (likely(!flow)) {
 991                rcu_assign_pointer(new_flow->sf_acts, acts);
 992
 993                /* Put flow in bucket. */
 994                error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
 995                if (unlikely(error)) {
 996                        acts = NULL;
 997                        goto err_unlock_ovs;
 998                }
 999
1000                if (unlikely(reply)) {
1001                        error = ovs_flow_cmd_fill_info(new_flow,
1002                                                       ovs_header->dp_ifindex,
1003                                                       reply, info->snd_portid,
1004                                                       info->snd_seq, 0,
1005                                                       OVS_FLOW_CMD_NEW,
1006                                                       ufid_flags);
1007                        BUG_ON(error < 0);
1008                }
1009                ovs_unlock();
1010        } else {
1011                struct sw_flow_actions *old_acts;
1012
1013                /* Bail out if we're not allowed to modify an existing flow.
1014                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1015                 * because Generic Netlink treats the latter as a dump
1016                 * request.  We also accept NLM_F_EXCL in case that bug ever
1017                 * gets fixed.
1018                 */
1019                if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1020                                                         | NLM_F_EXCL))) {
1021                        error = -EEXIST;
1022                        goto err_unlock_ovs;
1023                }
1024                /* The flow identifier has to be the same for flow updates.
1025                 * Look for any overlapping flow.
1026                 */
1027                if (unlikely(!ovs_flow_cmp(flow, &match))) {
1028                        if (ovs_identifier_is_key(&flow->id))
1029                                flow = ovs_flow_tbl_lookup_exact(&dp->table,
1030                                                                 &match);
1031                        else /* UFID matches but key is different */
1032                                flow = NULL;
1033                        if (!flow) {
1034                                error = -ENOENT;
1035                                goto err_unlock_ovs;
1036                        }
1037                }
1038                /* Update actions. */
1039                old_acts = ovsl_dereference(flow->sf_acts);
1040                rcu_assign_pointer(flow->sf_acts, acts);
1041
1042                if (unlikely(reply)) {
1043                        error = ovs_flow_cmd_fill_info(flow,
1044                                                       ovs_header->dp_ifindex,
1045                                                       reply, info->snd_portid,
1046                                                       info->snd_seq, 0,
1047                                                       OVS_FLOW_CMD_NEW,
1048                                                       ufid_flags);
1049                        BUG_ON(error < 0);
1050                }
1051                ovs_unlock();
1052
1053                ovs_nla_free_flow_actions_rcu(old_acts);
1054                ovs_flow_free(new_flow, false);
1055        }
1056
1057        if (reply)
1058                ovs_notify(&dp_flow_genl_family, reply, info);
1059        return 0;
1060
1061err_unlock_ovs:
1062        ovs_unlock();
1063        kfree_skb(reply);
1064err_kfree_acts:
1065        ovs_nla_free_flow_actions(acts);
1066err_kfree_flow:
1067        ovs_flow_free(new_flow, false);
1068error:
1069        return error;
1070}
1071
1072/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1073static struct sw_flow_actions *get_flow_actions(struct net *net,
1074                                                const struct nlattr *a,
1075                                                const struct sw_flow_key *key,
1076                                                const struct sw_flow_mask *mask,
1077                                                bool log)
1078{
1079        struct sw_flow_actions *acts;
1080        struct sw_flow_key masked_key;
1081        int error;
1082
1083        ovs_flow_mask_key(&masked_key, key, true, mask);
1084        error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1085        if (error) {
1086                OVS_NLERR(log,
1087                          "Actions may not be safe on all matching packets");
1088                return ERR_PTR(error);
1089        }
1090
1091        return acts;
1092}
1093
1094/* Factor out match-init and action-copy to avoid
1095 * "Wframe-larger-than=1024" warning. Because mask is only
1096 * used to get actions, we new a function to save some
1097 * stack space.
1098 *
1099 * If there are not key and action attrs, we return 0
1100 * directly. In the case, the caller will also not use the
1101 * match as before. If there is action attr, we try to get
1102 * actions and save them to *acts. Before returning from
1103 * the function, we reset the match->mask pointer. Because
1104 * we should not to return match object with dangling reference
1105 * to mask.
1106 * */
1107static int ovs_nla_init_match_and_action(struct net *net,
1108                                         struct sw_flow_match *match,
1109                                         struct sw_flow_key *key,
1110                                         struct nlattr **a,
1111                                         struct sw_flow_actions **acts,
1112                                         bool log)
1113{
1114        struct sw_flow_mask mask;
1115        int error = 0;
1116
1117        if (a[OVS_FLOW_ATTR_KEY]) {
1118                ovs_match_init(match, key, true, &mask);
1119                error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1120                                          a[OVS_FLOW_ATTR_MASK], log);
1121                if (error)
1122                        goto error;
1123        }
1124
1125        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1126                if (!a[OVS_FLOW_ATTR_KEY]) {
1127                        OVS_NLERR(log,
1128                                  "Flow key attribute not present in set flow.");
1129                        return -EINVAL;
1130                }
1131
1132                *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1133                                         &mask, log);
1134                if (IS_ERR(*acts)) {
1135                        error = PTR_ERR(*acts);
1136                        goto error;
1137                }
1138        }
1139
1140        /* On success, error is 0. */
1141error:
1142        match->mask = NULL;
1143        return error;
1144}
1145
1146static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1147{
1148        struct net *net = sock_net(skb->sk);
1149        struct nlattr **a = info->attrs;
1150        struct ovs_header *ovs_header = info->userhdr;
1151        struct sw_flow_key key;
1152        struct sw_flow *flow;
1153        struct sk_buff *reply = NULL;
1154        struct datapath *dp;
1155        struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1156        struct sw_flow_match match;
1157        struct sw_flow_id sfid;
1158        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1159        int error = 0;
1160        bool log = !a[OVS_FLOW_ATTR_PROBE];
1161        bool ufid_present;
1162
1163        ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1164        if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1165                OVS_NLERR(log,
1166                          "Flow set message rejected, Key attribute missing.");
1167                return -EINVAL;
1168        }
1169
1170        error = ovs_nla_init_match_and_action(net, &match, &key, a,
1171                                              &acts, log);
1172        if (error)
1173                goto error;
1174
1175        if (acts) {
1176                /* Can allocate before locking if have acts. */
1177                reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1178                                                ufid_flags);
1179                if (IS_ERR(reply)) {
1180                        error = PTR_ERR(reply);
1181                        goto err_kfree_acts;
1182                }
1183        }
1184
1185        ovs_lock();
1186        dp = get_dp(net, ovs_header->dp_ifindex);
1187        if (unlikely(!dp)) {
1188                error = -ENODEV;
1189                goto err_unlock_ovs;
1190        }
1191        /* Check that the flow exists. */
1192        if (ufid_present)
1193                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1194        else
1195                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1196        if (unlikely(!flow)) {
1197                error = -ENOENT;
1198                goto err_unlock_ovs;
1199        }
1200
1201        /* Update actions, if present. */
1202        if (likely(acts)) {
1203                old_acts = ovsl_dereference(flow->sf_acts);
1204                rcu_assign_pointer(flow->sf_acts, acts);
1205
1206                if (unlikely(reply)) {
1207                        error = ovs_flow_cmd_fill_info(flow,
1208                                                       ovs_header->dp_ifindex,
1209                                                       reply, info->snd_portid,
1210                                                       info->snd_seq, 0,
1211                                                       OVS_FLOW_CMD_NEW,
1212                                                       ufid_flags);
1213                        BUG_ON(error < 0);
1214                }
1215        } else {
1216                /* Could not alloc without acts before locking. */
1217                reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1218                                                info, OVS_FLOW_CMD_NEW, false,
1219                                                ufid_flags);
1220
1221                if (IS_ERR(reply)) {
1222                        error = PTR_ERR(reply);
1223                        goto err_unlock_ovs;
1224                }
1225        }
1226
1227        /* Clear stats. */
1228        if (a[OVS_FLOW_ATTR_CLEAR])
1229                ovs_flow_stats_clear(flow);
1230        ovs_unlock();
1231
1232        if (reply)
1233                ovs_notify(&dp_flow_genl_family, reply, info);
1234        if (old_acts)
1235                ovs_nla_free_flow_actions_rcu(old_acts);
1236
1237        return 0;
1238
1239err_unlock_ovs:
1240        ovs_unlock();
1241        kfree_skb(reply);
1242err_kfree_acts:
1243        ovs_nla_free_flow_actions(acts);
1244error:
1245        return error;
1246}
1247
1248static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1249{
1250        struct nlattr **a = info->attrs;
1251        struct ovs_header *ovs_header = info->userhdr;
1252        struct net *net = sock_net(skb->sk);
1253        struct sw_flow_key key;
1254        struct sk_buff *reply;
1255        struct sw_flow *flow;
1256        struct datapath *dp;
1257        struct sw_flow_match match;
1258        struct sw_flow_id ufid;
1259        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1260        int err = 0;
1261        bool log = !a[OVS_FLOW_ATTR_PROBE];
1262        bool ufid_present;
1263
1264        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1265        if (a[OVS_FLOW_ATTR_KEY]) {
1266                ovs_match_init(&match, &key, true, NULL);
1267                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1268                                        log);
1269        } else if (!ufid_present) {
1270                OVS_NLERR(log,
1271                          "Flow get message rejected, Key attribute missing.");
1272                err = -EINVAL;
1273        }
1274        if (err)
1275                return err;
1276
1277        ovs_lock();
1278        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1279        if (!dp) {
1280                err = -ENODEV;
1281                goto unlock;
1282        }
1283
1284        if (ufid_present)
1285                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1286        else
1287                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1288        if (!flow) {
1289                err = -ENOENT;
1290                goto unlock;
1291        }
1292
1293        reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1294                                        OVS_FLOW_CMD_NEW, true, ufid_flags);
1295        if (IS_ERR(reply)) {
1296                err = PTR_ERR(reply);
1297                goto unlock;
1298        }
1299
1300        ovs_unlock();
1301        return genlmsg_reply(reply, info);
1302unlock:
1303        ovs_unlock();
1304        return err;
1305}
1306
1307static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1308{
1309        struct nlattr **a = info->attrs;
1310        struct ovs_header *ovs_header = info->userhdr;
1311        struct net *net = sock_net(skb->sk);
1312        struct sw_flow_key key;
1313        struct sk_buff *reply;
1314        struct sw_flow *flow = NULL;
1315        struct datapath *dp;
1316        struct sw_flow_match match;
1317        struct sw_flow_id ufid;
1318        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1319        int err;
1320        bool log = !a[OVS_FLOW_ATTR_PROBE];
1321        bool ufid_present;
1322
1323        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1324        if (a[OVS_FLOW_ATTR_KEY]) {
1325                ovs_match_init(&match, &key, true, NULL);
1326                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1327                                        NULL, log);
1328                if (unlikely(err))
1329                        return err;
1330        }
1331
1332        ovs_lock();
1333        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1334        if (unlikely(!dp)) {
1335                err = -ENODEV;
1336                goto unlock;
1337        }
1338
1339        if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1340                err = ovs_flow_tbl_flush(&dp->table);
1341                goto unlock;
1342        }
1343
1344        if (ufid_present)
1345                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1346        else
1347                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1348        if (unlikely(!flow)) {
1349                err = -ENOENT;
1350                goto unlock;
1351        }
1352
1353        ovs_flow_tbl_remove(&dp->table, flow);
1354        ovs_unlock();
1355
1356        reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1357                                        &flow->id, info, false, ufid_flags);
1358        if (likely(reply)) {
1359                if (likely(!IS_ERR(reply))) {
1360                        rcu_read_lock();        /*To keep RCU checker happy. */
1361                        err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1362                                                     reply, info->snd_portid,
1363                                                     info->snd_seq, 0,
1364                                                     OVS_FLOW_CMD_DEL,
1365                                                     ufid_flags);
1366                        rcu_read_unlock();
1367                        BUG_ON(err < 0);
1368
1369                        ovs_notify(&dp_flow_genl_family, reply, info);
1370                } else {
1371                        netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1372                }
1373        }
1374
1375        ovs_flow_free(flow, true);
1376        return 0;
1377unlock:
1378        ovs_unlock();
1379        return err;
1380}
1381
1382static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1383{
1384        struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1385        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1386        struct table_instance *ti;
1387        struct datapath *dp;
1388        u32 ufid_flags;
1389        int err;
1390
1391        err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1392                            OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1393        if (err)
1394                return err;
1395        ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1396
1397        rcu_read_lock();
1398        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1399        if (!dp) {
1400                rcu_read_unlock();
1401                return -ENODEV;
1402        }
1403
1404        ti = rcu_dereference(dp->table.ti);
1405        for (;;) {
1406                struct sw_flow *flow;
1407                u32 bucket, obj;
1408
1409                bucket = cb->args[0];
1410                obj = cb->args[1];
1411                flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1412                if (!flow)
1413                        break;
1414
1415                if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1416                                           NETLINK_CB(cb->skb).portid,
1417                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1418                                           OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1419                        break;
1420
1421                cb->args[0] = bucket;
1422                cb->args[1] = obj;
1423        }
1424        rcu_read_unlock();
1425        return skb->len;
1426}
1427
1428static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1429        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1430        [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1431        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1432        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1433        [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1434        [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1435        [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1436};
1437
1438static const struct genl_ops dp_flow_genl_ops[] = {
1439        { .cmd = OVS_FLOW_CMD_NEW,
1440          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1441          .policy = flow_policy,
1442          .doit = ovs_flow_cmd_new
1443        },
1444        { .cmd = OVS_FLOW_CMD_DEL,
1445          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1446          .policy = flow_policy,
1447          .doit = ovs_flow_cmd_del
1448        },
1449        { .cmd = OVS_FLOW_CMD_GET,
1450          .flags = 0,               /* OK for unprivileged users. */
1451          .policy = flow_policy,
1452          .doit = ovs_flow_cmd_get,
1453          .dumpit = ovs_flow_cmd_dump
1454        },
1455        { .cmd = OVS_FLOW_CMD_SET,
1456          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1457          .policy = flow_policy,
1458          .doit = ovs_flow_cmd_set,
1459        },
1460};
1461
1462static struct genl_family dp_flow_genl_family __ro_after_init = {
1463        .hdrsize = sizeof(struct ovs_header),
1464        .name = OVS_FLOW_FAMILY,
1465        .version = OVS_FLOW_VERSION,
1466        .maxattr = OVS_FLOW_ATTR_MAX,
1467        .netnsok = true,
1468        .parallel_ops = true,
1469        .ops = dp_flow_genl_ops,
1470        .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1471        .mcgrps = &ovs_dp_flow_multicast_group,
1472        .n_mcgrps = 1,
1473        .module = THIS_MODULE,
1474};
1475
1476static size_t ovs_dp_cmd_msg_size(void)
1477{
1478        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1479
1480        msgsize += nla_total_size(IFNAMSIZ);
1481        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1482        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1483        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1484
1485        return msgsize;
1486}
1487
1488/* Called with ovs_mutex. */
1489static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1490                                u32 portid, u32 seq, u32 flags, u8 cmd)
1491{
1492        struct ovs_header *ovs_header;
1493        struct ovs_dp_stats dp_stats;
1494        struct ovs_dp_megaflow_stats dp_megaflow_stats;
1495        int err;
1496
1497        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1498                                   flags, cmd);
1499        if (!ovs_header)
1500                goto error;
1501
1502        ovs_header->dp_ifindex = get_dpifindex(dp);
1503
1504        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1505        if (err)
1506                goto nla_put_failure;
1507
1508        get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1509        if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1510                          &dp_stats, OVS_DP_ATTR_PAD))
1511                goto nla_put_failure;
1512
1513        if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1514                          sizeof(struct ovs_dp_megaflow_stats),
1515                          &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1516                goto nla_put_failure;
1517
1518        if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1519                goto nla_put_failure;
1520
1521        genlmsg_end(skb, ovs_header);
1522        return 0;
1523
1524nla_put_failure:
1525        genlmsg_cancel(skb, ovs_header);
1526error:
1527        return -EMSGSIZE;
1528}
1529
1530static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1531{
1532        return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1533}
1534
1535/* Called with rcu_read_lock or ovs_mutex. */
1536static struct datapath *lookup_datapath(struct net *net,
1537                                        const struct ovs_header *ovs_header,
1538                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1539{
1540        struct datapath *dp;
1541
1542        if (!a[OVS_DP_ATTR_NAME])
1543                dp = get_dp(net, ovs_header->dp_ifindex);
1544        else {
1545                struct vport *vport;
1546
1547                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1548                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1549        }
1550        return dp ? dp : ERR_PTR(-ENODEV);
1551}
1552
1553static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1554{
1555        struct datapath *dp;
1556
1557        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1558        if (IS_ERR(dp))
1559                return;
1560
1561        WARN(dp->user_features, "Dropping previously announced user features\n");
1562        dp->user_features = 0;
1563}
1564
1565static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1566{
1567        if (a[OVS_DP_ATTR_USER_FEATURES])
1568                dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1569}
1570
1571static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1572{
1573        struct nlattr **a = info->attrs;
1574        struct vport_parms parms;
1575        struct sk_buff *reply;
1576        struct datapath *dp;
1577        struct vport *vport;
1578        struct ovs_net *ovs_net;
1579        int err, i;
1580
1581        err = -EINVAL;
1582        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1583                goto err;
1584
1585        reply = ovs_dp_cmd_alloc_info();
1586        if (!reply)
1587                return -ENOMEM;
1588
1589        err = -ENOMEM;
1590        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1591        if (dp == NULL)
1592                goto err_free_reply;
1593
1594        ovs_dp_set_net(dp, sock_net(skb->sk));
1595
1596        /* Allocate table. */
1597        err = ovs_flow_tbl_init(&dp->table);
1598        if (err)
1599                goto err_free_dp;
1600
1601        dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1602        if (!dp->stats_percpu) {
1603                err = -ENOMEM;
1604                goto err_destroy_table;
1605        }
1606
1607        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1608                            GFP_KERNEL);
1609        if (!dp->ports) {
1610                err = -ENOMEM;
1611                goto err_destroy_percpu;
1612        }
1613
1614        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1615                INIT_HLIST_HEAD(&dp->ports[i]);
1616
1617        /* Set up our datapath device. */
1618        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1619        parms.type = OVS_VPORT_TYPE_INTERNAL;
1620        parms.options = NULL;
1621        parms.dp = dp;
1622        parms.port_no = OVSP_LOCAL;
1623        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1624
1625        ovs_dp_change(dp, a);
1626
1627        /* So far only local changes have been made, now need the lock. */
1628        ovs_lock();
1629
1630        vport = new_vport(&parms);
1631        if (IS_ERR(vport)) {
1632                err = PTR_ERR(vport);
1633                if (err == -EBUSY)
1634                        err = -EEXIST;
1635
1636                if (err == -EEXIST) {
1637                        /* An outdated user space instance that does not understand
1638                         * the concept of user_features has attempted to create a new
1639                         * datapath and is likely to reuse it. Drop all user features.
1640                         */
1641                        if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1642                                ovs_dp_reset_user_features(skb, info);
1643                }
1644
1645                goto err_destroy_ports_array;
1646        }
1647
1648        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1649                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1650        BUG_ON(err < 0);
1651
1652        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1653        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1654
1655        ovs_unlock();
1656
1657        ovs_notify(&dp_datapath_genl_family, reply, info);
1658        return 0;
1659
1660err_destroy_ports_array:
1661        ovs_unlock();
1662        kfree(dp->ports);
1663err_destroy_percpu:
1664        free_percpu(dp->stats_percpu);
1665err_destroy_table:
1666        ovs_flow_tbl_destroy(&dp->table);
1667err_free_dp:
1668        kfree(dp);
1669err_free_reply:
1670        kfree_skb(reply);
1671err:
1672        return err;
1673}
1674
1675/* Called with ovs_mutex. */
1676static void __dp_destroy(struct datapath *dp)
1677{
1678        int i;
1679
1680        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1681                struct vport *vport;
1682                struct hlist_node *n;
1683
1684                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1685                        if (vport->port_no != OVSP_LOCAL)
1686                                ovs_dp_detach_port(vport);
1687        }
1688
1689        list_del_rcu(&dp->list_node);
1690
1691        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1692         * all ports in datapath are destroyed first before freeing datapath.
1693         */
1694        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1695
1696        /* RCU destroy the flow table */
1697        call_rcu(&dp->rcu, destroy_dp_rcu);
1698}
1699
1700static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1701{
1702        struct sk_buff *reply;
1703        struct datapath *dp;
1704        int err;
1705
1706        reply = ovs_dp_cmd_alloc_info();
1707        if (!reply)
1708                return -ENOMEM;
1709
1710        ovs_lock();
1711        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1712        err = PTR_ERR(dp);
1713        if (IS_ERR(dp))
1714                goto err_unlock_free;
1715
1716        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1717                                   info->snd_seq, 0, OVS_DP_CMD_DEL);
1718        BUG_ON(err < 0);
1719
1720        __dp_destroy(dp);
1721        ovs_unlock();
1722
1723        ovs_notify(&dp_datapath_genl_family, reply, info);
1724
1725        return 0;
1726
1727err_unlock_free:
1728        ovs_unlock();
1729        kfree_skb(reply);
1730        return err;
1731}
1732
1733static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1734{
1735        struct sk_buff *reply;
1736        struct datapath *dp;
1737        int err;
1738
1739        reply = ovs_dp_cmd_alloc_info();
1740        if (!reply)
1741                return -ENOMEM;
1742
1743        ovs_lock();
1744        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1745        err = PTR_ERR(dp);
1746        if (IS_ERR(dp))
1747                goto err_unlock_free;
1748
1749        ovs_dp_change(dp, info->attrs);
1750
1751        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1752                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1753        BUG_ON(err < 0);
1754
1755        ovs_unlock();
1756        ovs_notify(&dp_datapath_genl_family, reply, info);
1757
1758        return 0;
1759
1760err_unlock_free:
1761        ovs_unlock();
1762        kfree_skb(reply);
1763        return err;
1764}
1765
1766static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1767{
1768        struct sk_buff *reply;
1769        struct datapath *dp;
1770        int err;
1771
1772        reply = ovs_dp_cmd_alloc_info();
1773        if (!reply)
1774                return -ENOMEM;
1775
1776        ovs_lock();
1777        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1778        if (IS_ERR(dp)) {
1779                err = PTR_ERR(dp);
1780                goto err_unlock_free;
1781        }
1782        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1783                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1784        BUG_ON(err < 0);
1785        ovs_unlock();
1786
1787        return genlmsg_reply(reply, info);
1788
1789err_unlock_free:
1790        ovs_unlock();
1791        kfree_skb(reply);
1792        return err;
1793}
1794
1795static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1796{
1797        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1798        struct datapath *dp;
1799        int skip = cb->args[0];
1800        int i = 0;
1801
1802        ovs_lock();
1803        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1804                if (i >= skip &&
1805                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1806                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1807                                         OVS_DP_CMD_NEW) < 0)
1808                        break;
1809                i++;
1810        }
1811        ovs_unlock();
1812
1813        cb->args[0] = i;
1814
1815        return skb->len;
1816}
1817
1818static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1819        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1820        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1821        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1822};
1823
1824static const struct genl_ops dp_datapath_genl_ops[] = {
1825        { .cmd = OVS_DP_CMD_NEW,
1826          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1827          .policy = datapath_policy,
1828          .doit = ovs_dp_cmd_new
1829        },
1830        { .cmd = OVS_DP_CMD_DEL,
1831          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1832          .policy = datapath_policy,
1833          .doit = ovs_dp_cmd_del
1834        },
1835        { .cmd = OVS_DP_CMD_GET,
1836          .flags = 0,               /* OK for unprivileged users. */
1837          .policy = datapath_policy,
1838          .doit = ovs_dp_cmd_get,
1839          .dumpit = ovs_dp_cmd_dump
1840        },
1841        { .cmd = OVS_DP_CMD_SET,
1842          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1843          .policy = datapath_policy,
1844          .doit = ovs_dp_cmd_set,
1845        },
1846};
1847
1848static struct genl_family dp_datapath_genl_family __ro_after_init = {
1849        .hdrsize = sizeof(struct ovs_header),
1850        .name = OVS_DATAPATH_FAMILY,
1851        .version = OVS_DATAPATH_VERSION,
1852        .maxattr = OVS_DP_ATTR_MAX,
1853        .netnsok = true,
1854        .parallel_ops = true,
1855        .ops = dp_datapath_genl_ops,
1856        .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1857        .mcgrps = &ovs_dp_datapath_multicast_group,
1858        .n_mcgrps = 1,
1859        .module = THIS_MODULE,
1860};
1861
1862/* Called with ovs_mutex or RCU read lock. */
1863static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1864                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1865{
1866        struct ovs_header *ovs_header;
1867        struct ovs_vport_stats vport_stats;
1868        int err;
1869
1870        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1871                                 flags, cmd);
1872        if (!ovs_header)
1873                return -EMSGSIZE;
1874
1875        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1876
1877        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1878            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1879            nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1880                           ovs_vport_name(vport)))
1881                goto nla_put_failure;
1882
1883        ovs_vport_get_stats(vport, &vport_stats);
1884        if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1885                          sizeof(struct ovs_vport_stats), &vport_stats,
1886                          OVS_VPORT_ATTR_PAD))
1887                goto nla_put_failure;
1888
1889        if (ovs_vport_get_upcall_portids(vport, skb))
1890                goto nla_put_failure;
1891
1892        err = ovs_vport_get_options(vport, skb);
1893        if (err == -EMSGSIZE)
1894                goto error;
1895
1896        genlmsg_end(skb, ovs_header);
1897        return 0;
1898
1899nla_put_failure:
1900        err = -EMSGSIZE;
1901error:
1902        genlmsg_cancel(skb, ovs_header);
1903        return err;
1904}
1905
1906static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1907{
1908        return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1909}
1910
1911/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1912struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1913                                         u32 seq, u8 cmd)
1914{
1915        struct sk_buff *skb;
1916        int retval;
1917
1918        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1919        if (!skb)
1920                return ERR_PTR(-ENOMEM);
1921
1922        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1923        BUG_ON(retval < 0);
1924
1925        return skb;
1926}
1927
1928/* Called with ovs_mutex or RCU read lock. */
1929static struct vport *lookup_vport(struct net *net,
1930                                  const struct ovs_header *ovs_header,
1931                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1932{
1933        struct datapath *dp;
1934        struct vport *vport;
1935
1936        if (a[OVS_VPORT_ATTR_NAME]) {
1937                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1938                if (!vport)
1939                        return ERR_PTR(-ENODEV);
1940                if (ovs_header->dp_ifindex &&
1941                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1942                        return ERR_PTR(-ENODEV);
1943                return vport;
1944        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1945                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1946
1947                if (port_no >= DP_MAX_PORTS)
1948                        return ERR_PTR(-EFBIG);
1949
1950                dp = get_dp(net, ovs_header->dp_ifindex);
1951                if (!dp)
1952                        return ERR_PTR(-ENODEV);
1953
1954                vport = ovs_vport_ovsl_rcu(dp, port_no);
1955                if (!vport)
1956                        return ERR_PTR(-ENODEV);
1957                return vport;
1958        } else
1959                return ERR_PTR(-EINVAL);
1960}
1961
1962/* Called with ovs_mutex */
1963static void update_headroom(struct datapath *dp)
1964{
1965        unsigned dev_headroom, max_headroom = 0;
1966        struct net_device *dev;
1967        struct vport *vport;
1968        int i;
1969
1970        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1971                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1972                        dev = vport->dev;
1973                        dev_headroom = netdev_get_fwd_headroom(dev);
1974                        if (dev_headroom > max_headroom)
1975                                max_headroom = dev_headroom;
1976                }
1977        }
1978
1979        dp->max_headroom = max_headroom;
1980        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1981                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1982                        netdev_set_rx_headroom(vport->dev, max_headroom);
1983}
1984
1985static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1986{
1987        struct nlattr **a = info->attrs;
1988        struct ovs_header *ovs_header = info->userhdr;
1989        struct vport_parms parms;
1990        struct sk_buff *reply;
1991        struct vport *vport;
1992        struct datapath *dp;
1993        u32 port_no;
1994        int err;
1995
1996        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1997            !a[OVS_VPORT_ATTR_UPCALL_PID])
1998                return -EINVAL;
1999
2000        port_no = a[OVS_VPORT_ATTR_PORT_NO]
2001                ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2002        if (port_no >= DP_MAX_PORTS)
2003                return -EFBIG;
2004
2005        reply = ovs_vport_cmd_alloc_info();
2006        if (!reply)
2007                return -ENOMEM;
2008
2009        ovs_lock();
2010restart:
2011        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2012        err = -ENODEV;
2013        if (!dp)
2014                goto exit_unlock_free;
2015
2016        if (port_no) {
2017                vport = ovs_vport_ovsl(dp, port_no);
2018                err = -EBUSY;
2019                if (vport)
2020                        goto exit_unlock_free;
2021        } else {
2022                for (port_no = 1; ; port_no++) {
2023                        if (port_no >= DP_MAX_PORTS) {
2024                                err = -EFBIG;
2025                                goto exit_unlock_free;
2026                        }
2027                        vport = ovs_vport_ovsl(dp, port_no);
2028                        if (!vport)
2029                                break;
2030                }
2031        }
2032
2033        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2034        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2035        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2036        parms.dp = dp;
2037        parms.port_no = port_no;
2038        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2039
2040        vport = new_vport(&parms);
2041        err = PTR_ERR(vport);
2042        if (IS_ERR(vport)) {
2043                if (err == -EAGAIN)
2044                        goto restart;
2045                goto exit_unlock_free;
2046        }
2047
2048        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2049                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2050
2051        if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2052                update_headroom(dp);
2053        else
2054                netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2055
2056        BUG_ON(err < 0);
2057        ovs_unlock();
2058
2059        ovs_notify(&dp_vport_genl_family, reply, info);
2060        return 0;
2061
2062exit_unlock_free:
2063        ovs_unlock();
2064        kfree_skb(reply);
2065        return err;
2066}
2067
2068static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2069{
2070        struct nlattr **a = info->attrs;
2071        struct sk_buff *reply;
2072        struct vport *vport;
2073        int err;
2074
2075        reply = ovs_vport_cmd_alloc_info();
2076        if (!reply)
2077                return -ENOMEM;
2078
2079        ovs_lock();
2080        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2081        err = PTR_ERR(vport);
2082        if (IS_ERR(vport))
2083                goto exit_unlock_free;
2084
2085        if (a[OVS_VPORT_ATTR_TYPE] &&
2086            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2087                err = -EINVAL;
2088                goto exit_unlock_free;
2089        }
2090
2091        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2092                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2093                if (err)
2094                        goto exit_unlock_free;
2095        }
2096
2097
2098        if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2099                struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2100
2101                err = ovs_vport_set_upcall_portids(vport, ids);
2102                if (err)
2103                        goto exit_unlock_free;
2104        }
2105
2106        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2107                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2108        BUG_ON(err < 0);
2109
2110        ovs_unlock();
2111        ovs_notify(&dp_vport_genl_family, reply, info);
2112        return 0;
2113
2114exit_unlock_free:
2115        ovs_unlock();
2116        kfree_skb(reply);
2117        return err;
2118}
2119
2120static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2121{
2122        bool must_update_headroom = false;
2123        struct nlattr **a = info->attrs;
2124        struct sk_buff *reply;
2125        struct datapath *dp;
2126        struct vport *vport;
2127        int err;
2128
2129        reply = ovs_vport_cmd_alloc_info();
2130        if (!reply)
2131                return -ENOMEM;
2132
2133        ovs_lock();
2134        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2135        err = PTR_ERR(vport);
2136        if (IS_ERR(vport))
2137                goto exit_unlock_free;
2138
2139        if (vport->port_no == OVSP_LOCAL) {
2140                err = -EINVAL;
2141                goto exit_unlock_free;
2142        }
2143
2144        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2145                                      info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2146        BUG_ON(err < 0);
2147
2148        /* the vport deletion may trigger dp headroom update */
2149        dp = vport->dp;
2150        if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2151                must_update_headroom = true;
2152        netdev_reset_rx_headroom(vport->dev);
2153        ovs_dp_detach_port(vport);
2154
2155        if (must_update_headroom)
2156                update_headroom(dp);
2157        ovs_unlock();
2158
2159        ovs_notify(&dp_vport_genl_family, reply, info);
2160        return 0;
2161
2162exit_unlock_free:
2163        ovs_unlock();
2164        kfree_skb(reply);
2165        return err;
2166}
2167
2168static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2169{
2170        struct nlattr **a = info->attrs;
2171        struct ovs_header *ovs_header = info->userhdr;
2172        struct sk_buff *reply;
2173        struct vport *vport;
2174        int err;
2175
2176        reply = ovs_vport_cmd_alloc_info();
2177        if (!reply)
2178                return -ENOMEM;
2179
2180        rcu_read_lock();
2181        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2182        err = PTR_ERR(vport);
2183        if (IS_ERR(vport))
2184                goto exit_unlock_free;
2185        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2186                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2187        BUG_ON(err < 0);
2188        rcu_read_unlock();
2189
2190        return genlmsg_reply(reply, info);
2191
2192exit_unlock_free:
2193        rcu_read_unlock();
2194        kfree_skb(reply);
2195        return err;
2196}
2197
2198static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2199{
2200        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2201        struct datapath *dp;
2202        int bucket = cb->args[0], skip = cb->args[1];
2203        int i, j = 0;
2204
2205        rcu_read_lock();
2206        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2207        if (!dp) {
2208                rcu_read_unlock();
2209                return -ENODEV;
2210        }
2211        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2212                struct vport *vport;
2213
2214                j = 0;
2215                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2216                        if (j >= skip &&
2217                            ovs_vport_cmd_fill_info(vport, skb,
2218                                                    NETLINK_CB(cb->skb).portid,
2219                                                    cb->nlh->nlmsg_seq,
2220                                                    NLM_F_MULTI,
2221                                                    OVS_VPORT_CMD_NEW) < 0)
2222                                goto out;
2223
2224                        j++;
2225                }
2226                skip = 0;
2227        }
2228out:
2229        rcu_read_unlock();
2230
2231        cb->args[0] = i;
2232        cb->args[1] = j;
2233
2234        return skb->len;
2235}
2236
2237static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2238        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2239        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2240        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2241        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2242        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2243        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2244};
2245
2246static const struct genl_ops dp_vport_genl_ops[] = {
2247        { .cmd = OVS_VPORT_CMD_NEW,
2248          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2249          .policy = vport_policy,
2250          .doit = ovs_vport_cmd_new
2251        },
2252        { .cmd = OVS_VPORT_CMD_DEL,
2253          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2254          .policy = vport_policy,
2255          .doit = ovs_vport_cmd_del
2256        },
2257        { .cmd = OVS_VPORT_CMD_GET,
2258          .flags = 0,               /* OK for unprivileged users. */
2259          .policy = vport_policy,
2260          .doit = ovs_vport_cmd_get,
2261          .dumpit = ovs_vport_cmd_dump
2262        },
2263        { .cmd = OVS_VPORT_CMD_SET,
2264          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2265          .policy = vport_policy,
2266          .doit = ovs_vport_cmd_set,
2267        },
2268};
2269
2270struct genl_family dp_vport_genl_family __ro_after_init = {
2271        .hdrsize = sizeof(struct ovs_header),
2272        .name = OVS_VPORT_FAMILY,
2273        .version = OVS_VPORT_VERSION,
2274        .maxattr = OVS_VPORT_ATTR_MAX,
2275        .netnsok = true,
2276        .parallel_ops = true,
2277        .ops = dp_vport_genl_ops,
2278        .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2279        .mcgrps = &ovs_dp_vport_multicast_group,
2280        .n_mcgrps = 1,
2281        .module = THIS_MODULE,
2282};
2283
2284static struct genl_family * const dp_genl_families[] = {
2285        &dp_datapath_genl_family,
2286        &dp_vport_genl_family,
2287        &dp_flow_genl_family,
2288        &dp_packet_genl_family,
2289};
2290
2291static void dp_unregister_genl(int n_families)
2292{
2293        int i;
2294
2295        for (i = 0; i < n_families; i++)
2296                genl_unregister_family(dp_genl_families[i]);
2297}
2298
2299static int __init dp_register_genl(void)
2300{
2301        int err;
2302        int i;
2303
2304        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2305
2306                err = genl_register_family(dp_genl_families[i]);
2307                if (err)
2308                        goto error;
2309        }
2310
2311        return 0;
2312
2313error:
2314        dp_unregister_genl(i);
2315        return err;
2316}
2317
2318static int __net_init ovs_init_net(struct net *net)
2319{
2320        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2321
2322        INIT_LIST_HEAD(&ovs_net->dps);
2323        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2324        ovs_ct_init(net);
2325        return 0;
2326}
2327
2328static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2329                                            struct list_head *head)
2330{
2331        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2332        struct datapath *dp;
2333
2334        list_for_each_entry(dp, &ovs_net->dps, list_node) {
2335                int i;
2336
2337                for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2338                        struct vport *vport;
2339
2340                        hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2341                                if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2342                                        continue;
2343
2344                                if (dev_net(vport->dev) == dnet)
2345                                        list_add(&vport->detach_list, head);
2346                        }
2347                }
2348        }
2349}
2350
2351static void __net_exit ovs_exit_net(struct net *dnet)
2352{
2353        struct datapath *dp, *dp_next;
2354        struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2355        struct vport *vport, *vport_next;
2356        struct net *net;
2357        LIST_HEAD(head);
2358
2359        ovs_ct_exit(dnet);
2360        ovs_lock();
2361        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2362                __dp_destroy(dp);
2363
2364        rtnl_lock();
2365        for_each_net(net)
2366                list_vports_from_net(net, dnet, &head);
2367        rtnl_unlock();
2368
2369        /* Detach all vports from given namespace. */
2370        list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2371                list_del(&vport->detach_list);
2372                ovs_dp_detach_port(vport);
2373        }
2374
2375        ovs_unlock();
2376
2377        cancel_work_sync(&ovs_net->dp_notify_work);
2378}
2379
2380static struct pernet_operations ovs_net_ops = {
2381        .init = ovs_init_net,
2382        .exit = ovs_exit_net,
2383        .id   = &ovs_net_id,
2384        .size = sizeof(struct ovs_net),
2385};
2386
2387static int __init dp_init(void)
2388{
2389        int err;
2390
2391        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2392
2393        pr_info("Open vSwitch switching datapath\n");
2394
2395        err = action_fifos_init();
2396        if (err)
2397                goto error;
2398
2399        err = ovs_internal_dev_rtnl_link_register();
2400        if (err)
2401                goto error_action_fifos_exit;
2402
2403        err = ovs_flow_init();
2404        if (err)
2405                goto error_unreg_rtnl_link;
2406
2407        err = ovs_vport_init();
2408        if (err)
2409                goto error_flow_exit;
2410
2411        err = register_pernet_device(&ovs_net_ops);
2412        if (err)
2413                goto error_vport_exit;
2414
2415        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2416        if (err)
2417                goto error_netns_exit;
2418
2419        err = ovs_netdev_init();
2420        if (err)
2421                goto error_unreg_notifier;
2422
2423        err = dp_register_genl();
2424        if (err < 0)
2425                goto error_unreg_netdev;
2426
2427        return 0;
2428
2429error_unreg_netdev:
2430        ovs_netdev_exit();
2431error_unreg_notifier:
2432        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2433error_netns_exit:
2434        unregister_pernet_device(&ovs_net_ops);
2435error_vport_exit:
2436        ovs_vport_exit();
2437error_flow_exit:
2438        ovs_flow_exit();
2439error_unreg_rtnl_link:
2440        ovs_internal_dev_rtnl_link_unregister();
2441error_action_fifos_exit:
2442        action_fifos_exit();
2443error:
2444        return err;
2445}
2446
2447static void dp_cleanup(void)
2448{
2449        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2450        ovs_netdev_exit();
2451        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2452        unregister_pernet_device(&ovs_net_ops);
2453        rcu_barrier();
2454        ovs_vport_exit();
2455        ovs_flow_exit();
2456        ovs_internal_dev_rtnl_link_unregister();
2457        action_fifos_exit();
2458}
2459
2460module_init(dp_init);
2461module_exit(dp_cleanup);
2462
2463MODULE_DESCRIPTION("Open vSwitch switching datapath");
2464MODULE_LICENSE("GPL");
2465MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2466MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2467MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2468MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2469