linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2014 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/openvswitch.h>
  48#include <linux/rculist.h>
  49#include <linux/dmi.h>
  50#include <net/genetlink.h>
  51#include <net/net_namespace.h>
  52#include <net/netns/generic.h>
  53
  54#include "datapath.h"
  55#include "flow.h"
  56#include "flow_table.h"
  57#include "flow_netlink.h"
  58#include "vport-internal_dev.h"
  59#include "vport-netdev.h"
  60
  61unsigned int ovs_net_id __read_mostly;
  62
  63static struct genl_family dp_packet_genl_family;
  64static struct genl_family dp_flow_genl_family;
  65static struct genl_family dp_datapath_genl_family;
  66
  67static const struct nla_policy flow_policy[];
  68
  69static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
  70        .name = OVS_FLOW_MCGROUP,
  71};
  72
  73static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  74        .name = OVS_DATAPATH_MCGROUP,
  75};
  76
  77static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
  78        .name = OVS_VPORT_MCGROUP,
  79};
  80
  81/* Check if need to build a reply message.
  82 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  83static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
  84                            unsigned int group)
  85{
  86        return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  87               genl_has_listeners(family, genl_info_net(info), group);
  88}
  89
  90static void ovs_notify(struct genl_family *family,
  91                       struct sk_buff *skb, struct genl_info *info)
  92{
  93        genl_notify(family, skb, info, 0, GFP_KERNEL);
  94}
  95
  96/**
  97 * DOC: Locking:
  98 *
  99 * All writes e.g. Writes to device state (add/remove datapath, port, set
 100 * operations on vports, etc.), Writes to other state (flow table
 101 * modifications, set miscellaneous datapath parameters, etc.) are protected
 102 * by ovs_lock.
 103 *
 104 * Reads are protected by RCU.
 105 *
 106 * There are a few special cases (mostly stats) that have their own
 107 * synchronization but they nest under all of above and don't interact with
 108 * each other.
 109 *
 110 * The RTNL lock nests inside ovs_mutex.
 111 */
 112
 113static DEFINE_MUTEX(ovs_mutex);
 114
 115void ovs_lock(void)
 116{
 117        mutex_lock(&ovs_mutex);
 118}
 119
 120void ovs_unlock(void)
 121{
 122        mutex_unlock(&ovs_mutex);
 123}
 124
 125#ifdef CONFIG_LOCKDEP
 126int lockdep_ovsl_is_held(void)
 127{
 128        if (debug_locks)
 129                return lockdep_is_held(&ovs_mutex);
 130        else
 131                return 1;
 132}
 133#endif
 134
 135static struct vport *new_vport(const struct vport_parms *);
 136static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 137                             const struct sw_flow_key *,
 138                             const struct dp_upcall_info *,
 139                             uint32_t cutlen);
 140static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 141                                  const struct sw_flow_key *,
 142                                  const struct dp_upcall_info *,
 143                                  uint32_t cutlen);
 144
 145/* Must be called with rcu_read_lock. */
 146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
 147{
 148        struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
 149
 150        if (dev) {
 151                struct vport *vport = ovs_internal_dev_get_vport(dev);
 152                if (vport)
 153                        return vport->dp;
 154        }
 155
 156        return NULL;
 157}
 158
 159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
 160 * returned dp pointer valid.
 161 */
 162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
 163{
 164        struct datapath *dp;
 165
 166        WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
 167        rcu_read_lock();
 168        dp = get_dp_rcu(net, dp_ifindex);
 169        rcu_read_unlock();
 170
 171        return dp;
 172}
 173
 174/* Must be called with rcu_read_lock or ovs_mutex. */
 175const char *ovs_dp_name(const struct datapath *dp)
 176{
 177        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 178        return ovs_vport_name(vport);
 179}
 180
 181static int get_dpifindex(const struct datapath *dp)
 182{
 183        struct vport *local;
 184        int ifindex;
 185
 186        rcu_read_lock();
 187
 188        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 189        if (local)
 190                ifindex = local->dev->ifindex;
 191        else
 192                ifindex = 0;
 193
 194        rcu_read_unlock();
 195
 196        return ifindex;
 197}
 198
 199static void destroy_dp_rcu(struct rcu_head *rcu)
 200{
 201        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 202
 203        ovs_flow_tbl_destroy(&dp->table);
 204        free_percpu(dp->stats_percpu);
 205        kfree(dp->ports);
 206        kfree(dp);
 207}
 208
 209static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 210                                            u16 port_no)
 211{
 212        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 213}
 214
 215/* Called with ovs_mutex or RCU read lock. */
 216struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 217{
 218        struct vport *vport;
 219        struct hlist_head *head;
 220
 221        head = vport_hash_bucket(dp, port_no);
 222        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 223                if (vport->port_no == port_no)
 224                        return vport;
 225        }
 226        return NULL;
 227}
 228
 229/* Called with ovs_mutex. */
 230static struct vport *new_vport(const struct vport_parms *parms)
 231{
 232        struct vport *vport;
 233
 234        vport = ovs_vport_add(parms);
 235        if (!IS_ERR(vport)) {
 236                struct datapath *dp = parms->dp;
 237                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 238
 239                hlist_add_head_rcu(&vport->dp_hash_node, head);
 240        }
 241        return vport;
 242}
 243
 244void ovs_dp_detach_port(struct vport *p)
 245{
 246        ASSERT_OVSL();
 247
 248        /* First drop references to device. */
 249        hlist_del_rcu(&p->dp_hash_node);
 250
 251        /* Then destroy it. */
 252        ovs_vport_del(p);
 253}
 254
 255/* Must be called with rcu_read_lock. */
 256void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 257{
 258        const struct vport *p = OVS_CB(skb)->input_vport;
 259        struct datapath *dp = p->dp;
 260        struct sw_flow *flow;
 261        struct sw_flow_actions *sf_acts;
 262        struct dp_stats_percpu *stats;
 263        u64 *stats_counter;
 264        u32 n_mask_hit;
 265
 266        stats = this_cpu_ptr(dp->stats_percpu);
 267
 268        /* Look up flow. */
 269        flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
 270        if (unlikely(!flow)) {
 271                struct dp_upcall_info upcall;
 272                int error;
 273
 274                memset(&upcall, 0, sizeof(upcall));
 275                upcall.cmd = OVS_PACKET_CMD_MISS;
 276                upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 277                upcall.mru = OVS_CB(skb)->mru;
 278                error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
 279                if (unlikely(error))
 280                        kfree_skb(skb);
 281                else
 282                        consume_skb(skb);
 283                stats_counter = &stats->n_missed;
 284                goto out;
 285        }
 286
 287        ovs_flow_stats_update(flow, key->tp.flags, skb);
 288        sf_acts = rcu_dereference(flow->sf_acts);
 289        ovs_execute_actions(dp, skb, sf_acts, key);
 290
 291        stats_counter = &stats->n_hit;
 292
 293out:
 294        /* Update datapath statistics. */
 295        u64_stats_update_begin(&stats->syncp);
 296        (*stats_counter)++;
 297        stats->n_mask_hit += n_mask_hit;
 298        u64_stats_update_end(&stats->syncp);
 299}
 300
 301int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 302                  const struct sw_flow_key *key,
 303                  const struct dp_upcall_info *upcall_info,
 304                  uint32_t cutlen)
 305{
 306        struct dp_stats_percpu *stats;
 307        int err;
 308
 309        if (upcall_info->portid == 0) {
 310                err = -ENOTCONN;
 311                goto err;
 312        }
 313
 314        if (!skb_is_gso(skb))
 315                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 316        else
 317                err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
 318        if (err)
 319                goto err;
 320
 321        return 0;
 322
 323err:
 324        stats = this_cpu_ptr(dp->stats_percpu);
 325
 326        u64_stats_update_begin(&stats->syncp);
 327        stats->n_lost++;
 328        u64_stats_update_end(&stats->syncp);
 329
 330        return err;
 331}
 332
 333static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 334                             const struct sw_flow_key *key,
 335                             const struct dp_upcall_info *upcall_info,
 336                                 uint32_t cutlen)
 337{
 338        struct sk_buff *segs, *nskb;
 339        int err;
 340
 341        BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
 342        segs = __skb_gso_segment(skb, NETIF_F_SG, false);
 343        if (IS_ERR(segs))
 344                return PTR_ERR(segs);
 345        if (segs == NULL)
 346                return -EINVAL;
 347
 348        /* Queue all of the segments. */
 349        skb = segs;
 350        do {
 351                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 352                if (err)
 353                        break;
 354
 355        } while ((skb = skb->next));
 356
 357        /* Free all of the segments. */
 358        skb = segs;
 359        do {
 360                nskb = skb->next;
 361                if (err)
 362                        kfree_skb(skb);
 363                else
 364                        consume_skb(skb);
 365        } while ((skb = nskb));
 366        return err;
 367}
 368
 369static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 370                              unsigned int hdrlen, int actions_attrlen)
 371{
 372        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 373                + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
 374                + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
 375                + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
 376
 377        /* OVS_PACKET_ATTR_USERDATA */
 378        if (upcall_info->userdata)
 379                size += NLA_ALIGN(upcall_info->userdata->nla_len);
 380
 381        /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
 382        if (upcall_info->egress_tun_info)
 383                size += nla_total_size(ovs_tun_key_attr_size());
 384
 385        /* OVS_PACKET_ATTR_ACTIONS */
 386        if (upcall_info->actions_len)
 387                size += nla_total_size(actions_attrlen);
 388
 389        /* OVS_PACKET_ATTR_MRU */
 390        if (upcall_info->mru)
 391                size += nla_total_size(sizeof(upcall_info->mru));
 392
 393        return size;
 394}
 395
 396static void pad_packet(struct datapath *dp, struct sk_buff *skb)
 397{
 398        if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
 399                size_t plen = NLA_ALIGN(skb->len) - skb->len;
 400
 401                if (plen > 0)
 402                        skb_put_zero(skb, plen);
 403        }
 404}
 405
 406static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 407                                  const struct sw_flow_key *key,
 408                                  const struct dp_upcall_info *upcall_info,
 409                                  uint32_t cutlen)
 410{
 411        struct ovs_header *upcall;
 412        struct sk_buff *nskb = NULL;
 413        struct sk_buff *user_skb = NULL; /* to be queued to userspace */
 414        struct nlattr *nla;
 415        size_t len;
 416        unsigned int hlen;
 417        int err, dp_ifindex;
 418
 419        dp_ifindex = get_dpifindex(dp);
 420        if (!dp_ifindex)
 421                return -ENODEV;
 422
 423        if (skb_vlan_tag_present(skb)) {
 424                nskb = skb_clone(skb, GFP_ATOMIC);
 425                if (!nskb)
 426                        return -ENOMEM;
 427
 428                nskb = __vlan_hwaccel_push_inside(nskb);
 429                if (!nskb)
 430                        return -ENOMEM;
 431
 432                skb = nskb;
 433        }
 434
 435        if (nla_attr_size(skb->len) > USHRT_MAX) {
 436                err = -EFBIG;
 437                goto out;
 438        }
 439
 440        /* Complete checksum if needed */
 441        if (skb->ip_summed == CHECKSUM_PARTIAL &&
 442            (err = skb_csum_hwoffload_help(skb, 0)))
 443                goto out;
 444
 445        /* Older versions of OVS user space enforce alignment of the last
 446         * Netlink attribute to NLA_ALIGNTO which would require extensive
 447         * padding logic. Only perform zerocopy if padding is not required.
 448         */
 449        if (dp->user_features & OVS_DP_F_UNALIGNED)
 450                hlen = skb_zerocopy_headlen(skb);
 451        else
 452                hlen = skb->len;
 453
 454        len = upcall_msg_size(upcall_info, hlen - cutlen,
 455                              OVS_CB(skb)->acts_origlen);
 456        user_skb = genlmsg_new(len, GFP_ATOMIC);
 457        if (!user_skb) {
 458                err = -ENOMEM;
 459                goto out;
 460        }
 461
 462        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 463                             0, upcall_info->cmd);
 464        upcall->dp_ifindex = dp_ifindex;
 465
 466        err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 467        BUG_ON(err);
 468
 469        if (upcall_info->userdata)
 470                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 471                          nla_len(upcall_info->userdata),
 472                          nla_data(upcall_info->userdata));
 473
 474        if (upcall_info->egress_tun_info) {
 475                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
 476                err = ovs_nla_put_tunnel_info(user_skb,
 477                                              upcall_info->egress_tun_info);
 478                BUG_ON(err);
 479                nla_nest_end(user_skb, nla);
 480        }
 481
 482        if (upcall_info->actions_len) {
 483                nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
 484                err = ovs_nla_put_actions(upcall_info->actions,
 485                                          upcall_info->actions_len,
 486                                          user_skb);
 487                if (!err)
 488                        nla_nest_end(user_skb, nla);
 489                else
 490                        nla_nest_cancel(user_skb, nla);
 491        }
 492
 493        /* Add OVS_PACKET_ATTR_MRU */
 494        if (upcall_info->mru) {
 495                if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
 496                                upcall_info->mru)) {
 497                        err = -ENOBUFS;
 498                        goto out;
 499                }
 500                pad_packet(dp, user_skb);
 501        }
 502
 503        /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
 504        if (cutlen > 0) {
 505                if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
 506                                skb->len)) {
 507                        err = -ENOBUFS;
 508                        goto out;
 509                }
 510                pad_packet(dp, user_skb);
 511        }
 512
 513        /* Only reserve room for attribute header, packet data is added
 514         * in skb_zerocopy() */
 515        if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
 516                err = -ENOBUFS;
 517                goto out;
 518        }
 519        nla->nla_len = nla_attr_size(skb->len - cutlen);
 520
 521        err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
 522        if (err)
 523                goto out;
 524
 525        /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
 526        pad_packet(dp, user_skb);
 527
 528        ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 529
 530        err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
 531        user_skb = NULL;
 532out:
 533        if (err)
 534                skb_tx_error(skb);
 535        kfree_skb(user_skb);
 536        kfree_skb(nskb);
 537        return err;
 538}
 539
 540static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 541{
 542        struct ovs_header *ovs_header = info->userhdr;
 543        struct net *net = sock_net(skb->sk);
 544        struct nlattr **a = info->attrs;
 545        struct sw_flow_actions *acts;
 546        struct sk_buff *packet;
 547        struct sw_flow *flow;
 548        struct sw_flow_actions *sf_acts;
 549        struct datapath *dp;
 550        struct vport *input_vport;
 551        u16 mru = 0;
 552        int len;
 553        int err;
 554        bool log = !a[OVS_PACKET_ATTR_PROBE];
 555
 556        err = -EINVAL;
 557        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 558            !a[OVS_PACKET_ATTR_ACTIONS])
 559                goto err;
 560
 561        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 562        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 563        err = -ENOMEM;
 564        if (!packet)
 565                goto err;
 566        skb_reserve(packet, NET_IP_ALIGN);
 567
 568        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 569
 570        /* Set packet's mru */
 571        if (a[OVS_PACKET_ATTR_MRU]) {
 572                mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
 573                packet->ignore_df = 1;
 574        }
 575        OVS_CB(packet)->mru = mru;
 576
 577        /* Build an sw_flow for sending this packet. */
 578        flow = ovs_flow_alloc();
 579        err = PTR_ERR(flow);
 580        if (IS_ERR(flow))
 581                goto err_kfree_skb;
 582
 583        err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
 584                                             packet, &flow->key, log);
 585        if (err)
 586                goto err_flow_free;
 587
 588        err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
 589                                   &flow->key, &acts, log);
 590        if (err)
 591                goto err_flow_free;
 592
 593        rcu_assign_pointer(flow->sf_acts, acts);
 594        packet->priority = flow->key.phy.priority;
 595        packet->mark = flow->key.phy.skb_mark;
 596
 597        rcu_read_lock();
 598        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
 599        err = -ENODEV;
 600        if (!dp)
 601                goto err_unlock;
 602
 603        input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
 604        if (!input_vport)
 605                input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
 606
 607        if (!input_vport)
 608                goto err_unlock;
 609
 610        packet->dev = input_vport->dev;
 611        OVS_CB(packet)->input_vport = input_vport;
 612        sf_acts = rcu_dereference(flow->sf_acts);
 613
 614        local_bh_disable();
 615        err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
 616        local_bh_enable();
 617        rcu_read_unlock();
 618
 619        ovs_flow_free(flow, false);
 620        return err;
 621
 622err_unlock:
 623        rcu_read_unlock();
 624err_flow_free:
 625        ovs_flow_free(flow, false);
 626err_kfree_skb:
 627        kfree_skb(packet);
 628err:
 629        return err;
 630}
 631
 632static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 633        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 634        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 635        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 636        [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
 637        [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
 638};
 639
 640static const struct genl_ops dp_packet_genl_ops[] = {
 641        { .cmd = OVS_PACKET_CMD_EXECUTE,
 642          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 643          .policy = packet_policy,
 644          .doit = ovs_packet_cmd_execute
 645        }
 646};
 647
 648static struct genl_family dp_packet_genl_family __ro_after_init = {
 649        .hdrsize = sizeof(struct ovs_header),
 650        .name = OVS_PACKET_FAMILY,
 651        .version = OVS_PACKET_VERSION,
 652        .maxattr = OVS_PACKET_ATTR_MAX,
 653        .netnsok = true,
 654        .parallel_ops = true,
 655        .ops = dp_packet_genl_ops,
 656        .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
 657        .module = THIS_MODULE,
 658};
 659
 660static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 661                         struct ovs_dp_megaflow_stats *mega_stats)
 662{
 663        int i;
 664
 665        memset(mega_stats, 0, sizeof(*mega_stats));
 666
 667        stats->n_flows = ovs_flow_tbl_count(&dp->table);
 668        mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
 669
 670        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 671
 672        for_each_possible_cpu(i) {
 673                const struct dp_stats_percpu *percpu_stats;
 674                struct dp_stats_percpu local_stats;
 675                unsigned int start;
 676
 677                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 678
 679                do {
 680                        start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
 681                        local_stats = *percpu_stats;
 682                } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
 683
 684                stats->n_hit += local_stats.n_hit;
 685                stats->n_missed += local_stats.n_missed;
 686                stats->n_lost += local_stats.n_lost;
 687                mega_stats->n_mask_hit += local_stats.n_mask_hit;
 688        }
 689}
 690
 691static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
 692{
 693        return ovs_identifier_is_ufid(sfid) &&
 694               !(ufid_flags & OVS_UFID_F_OMIT_KEY);
 695}
 696
 697static bool should_fill_mask(uint32_t ufid_flags)
 698{
 699        return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
 700}
 701
 702static bool should_fill_actions(uint32_t ufid_flags)
 703{
 704        return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
 705}
 706
 707static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
 708                                    const struct sw_flow_id *sfid,
 709                                    uint32_t ufid_flags)
 710{
 711        size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
 712
 713        /* OVS_FLOW_ATTR_UFID */
 714        if (sfid && ovs_identifier_is_ufid(sfid))
 715                len += nla_total_size(sfid->ufid_len);
 716
 717        /* OVS_FLOW_ATTR_KEY */
 718        if (!sfid || should_fill_key(sfid, ufid_flags))
 719                len += nla_total_size(ovs_key_attr_size());
 720
 721        /* OVS_FLOW_ATTR_MASK */
 722        if (should_fill_mask(ufid_flags))
 723                len += nla_total_size(ovs_key_attr_size());
 724
 725        /* OVS_FLOW_ATTR_ACTIONS */
 726        if (should_fill_actions(ufid_flags))
 727                len += nla_total_size(acts->orig_len);
 728
 729        return len
 730                + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 731                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 732                + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
 733}
 734
 735/* Called with ovs_mutex or RCU read lock. */
 736static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 737                                   struct sk_buff *skb)
 738{
 739        struct ovs_flow_stats stats;
 740        __be16 tcp_flags;
 741        unsigned long used;
 742
 743        ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
 744
 745        if (used &&
 746            nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
 747                              OVS_FLOW_ATTR_PAD))
 748                return -EMSGSIZE;
 749
 750        if (stats.n_packets &&
 751            nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
 752                          sizeof(struct ovs_flow_stats), &stats,
 753                          OVS_FLOW_ATTR_PAD))
 754                return -EMSGSIZE;
 755
 756        if ((u8)ntohs(tcp_flags) &&
 757             nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
 758                return -EMSGSIZE;
 759
 760        return 0;
 761}
 762
 763/* Called with ovs_mutex or RCU read lock. */
 764static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 765                                     struct sk_buff *skb, int skb_orig_len)
 766{
 767        struct nlattr *start;
 768        int err;
 769
 770        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 771         * this is the first flow to be dumped into 'skb'.  This is unusual for
 772         * Netlink but individual action lists can be longer than
 773         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 774         * The userspace caller can always fetch the actions separately if it
 775         * really wants them.  (Most userspace callers in fact don't care.)
 776         *
 777         * This can only fail for dump operations because the skb is always
 778         * properly sized for single flows.
 779         */
 780        start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
 781        if (start) {
 782                const struct sw_flow_actions *sf_acts;
 783
 784                sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 785                err = ovs_nla_put_actions(sf_acts->actions,
 786                                          sf_acts->actions_len, skb);
 787
 788                if (!err)
 789                        nla_nest_end(skb, start);
 790                else {
 791                        if (skb_orig_len)
 792                                return err;
 793
 794                        nla_nest_cancel(skb, start);
 795                }
 796        } else if (skb_orig_len) {
 797                return -EMSGSIZE;
 798        }
 799
 800        return 0;
 801}
 802
 803/* Called with ovs_mutex or RCU read lock. */
 804static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 805                                  struct sk_buff *skb, u32 portid,
 806                                  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 807{
 808        const int skb_orig_len = skb->len;
 809        struct ovs_header *ovs_header;
 810        int err;
 811
 812        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
 813                                 flags, cmd);
 814        if (!ovs_header)
 815                return -EMSGSIZE;
 816
 817        ovs_header->dp_ifindex = dp_ifindex;
 818
 819        err = ovs_nla_put_identifier(flow, skb);
 820        if (err)
 821                goto error;
 822
 823        if (should_fill_key(&flow->id, ufid_flags)) {
 824                err = ovs_nla_put_masked_key(flow, skb);
 825                if (err)
 826                        goto error;
 827        }
 828
 829        if (should_fill_mask(ufid_flags)) {
 830                err = ovs_nla_put_mask(flow, skb);
 831                if (err)
 832                        goto error;
 833        }
 834
 835        err = ovs_flow_cmd_fill_stats(flow, skb);
 836        if (err)
 837                goto error;
 838
 839        if (should_fill_actions(ufid_flags)) {
 840                err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
 841                if (err)
 842                        goto error;
 843        }
 844
 845        genlmsg_end(skb, ovs_header);
 846        return 0;
 847
 848error:
 849        genlmsg_cancel(skb, ovs_header);
 850        return err;
 851}
 852
 853/* May not be called with RCU read lock. */
 854static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
 855                                               const struct sw_flow_id *sfid,
 856                                               struct genl_info *info,
 857                                               bool always,
 858                                               uint32_t ufid_flags)
 859{
 860        struct sk_buff *skb;
 861        size_t len;
 862
 863        if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 864                return NULL;
 865
 866        len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
 867        skb = genlmsg_new(len, GFP_KERNEL);
 868        if (!skb)
 869                return ERR_PTR(-ENOMEM);
 870
 871        return skb;
 872}
 873
 874/* Called with ovs_mutex. */
 875static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 876                                               int dp_ifindex,
 877                                               struct genl_info *info, u8 cmd,
 878                                               bool always, u32 ufid_flags)
 879{
 880        struct sk_buff *skb;
 881        int retval;
 882
 883        skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
 884                                      &flow->id, info, always, ufid_flags);
 885        if (IS_ERR_OR_NULL(skb))
 886                return skb;
 887
 888        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 889                                        info->snd_portid, info->snd_seq, 0,
 890                                        cmd, ufid_flags);
 891        BUG_ON(retval < 0);
 892        return skb;
 893}
 894
 895static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 896{
 897        struct net *net = sock_net(skb->sk);
 898        struct nlattr **a = info->attrs;
 899        struct ovs_header *ovs_header = info->userhdr;
 900        struct sw_flow *flow = NULL, *new_flow;
 901        struct sw_flow_mask mask;
 902        struct sk_buff *reply;
 903        struct datapath *dp;
 904        struct sw_flow_actions *acts;
 905        struct sw_flow_match match;
 906        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 907        int error;
 908        bool log = !a[OVS_FLOW_ATTR_PROBE];
 909
 910        /* Must have key and actions. */
 911        error = -EINVAL;
 912        if (!a[OVS_FLOW_ATTR_KEY]) {
 913                OVS_NLERR(log, "Flow key attr not present in new flow.");
 914                goto error;
 915        }
 916        if (!a[OVS_FLOW_ATTR_ACTIONS]) {
 917                OVS_NLERR(log, "Flow actions attr not present in new flow.");
 918                goto error;
 919        }
 920
 921        /* Most of the time we need to allocate a new flow, do it before
 922         * locking.
 923         */
 924        new_flow = ovs_flow_alloc();
 925        if (IS_ERR(new_flow)) {
 926                error = PTR_ERR(new_flow);
 927                goto error;
 928        }
 929
 930        /* Extract key. */
 931        ovs_match_init(&match, &new_flow->key, false, &mask);
 932        error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 933                                  a[OVS_FLOW_ATTR_MASK], log);
 934        if (error)
 935                goto err_kfree_flow;
 936
 937        /* Extract flow identifier. */
 938        error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
 939                                       &new_flow->key, log);
 940        if (error)
 941                goto err_kfree_flow;
 942
 943        /* unmasked key is needed to match when ufid is not used. */
 944        if (ovs_identifier_is_key(&new_flow->id))
 945                match.key = new_flow->id.unmasked_key;
 946
 947        ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
 948
 949        /* Validate actions. */
 950        error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 951                                     &new_flow->key, &acts, log);
 952        if (error) {
 953                OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
 954                goto err_kfree_flow;
 955        }
 956
 957        reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
 958                                        ufid_flags);
 959        if (IS_ERR(reply)) {
 960                error = PTR_ERR(reply);
 961                goto err_kfree_acts;
 962        }
 963
 964        ovs_lock();
 965        dp = get_dp(net, ovs_header->dp_ifindex);
 966        if (unlikely(!dp)) {
 967                error = -ENODEV;
 968                goto err_unlock_ovs;
 969        }
 970
 971        /* Check if this is a duplicate flow */
 972        if (ovs_identifier_is_ufid(&new_flow->id))
 973                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
 974        if (!flow)
 975                flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
 976        if (likely(!flow)) {
 977                rcu_assign_pointer(new_flow->sf_acts, acts);
 978
 979                /* Put flow in bucket. */
 980                error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
 981                if (unlikely(error)) {
 982                        acts = NULL;
 983                        goto err_unlock_ovs;
 984                }
 985
 986                if (unlikely(reply)) {
 987                        error = ovs_flow_cmd_fill_info(new_flow,
 988                                                       ovs_header->dp_ifindex,
 989                                                       reply, info->snd_portid,
 990                                                       info->snd_seq, 0,
 991                                                       OVS_FLOW_CMD_NEW,
 992                                                       ufid_flags);
 993                        BUG_ON(error < 0);
 994                }
 995                ovs_unlock();
 996        } else {
 997                struct sw_flow_actions *old_acts;
 998
 999                /* Bail out if we're not allowed to modify an existing flow.
1000                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1001                 * because Generic Netlink treats the latter as a dump
1002                 * request.  We also accept NLM_F_EXCL in case that bug ever
1003                 * gets fixed.
1004                 */
1005                if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1006                                                         | NLM_F_EXCL))) {
1007                        error = -EEXIST;
1008                        goto err_unlock_ovs;
1009                }
1010                /* The flow identifier has to be the same for flow updates.
1011                 * Look for any overlapping flow.
1012                 */
1013                if (unlikely(!ovs_flow_cmp(flow, &match))) {
1014                        if (ovs_identifier_is_key(&flow->id))
1015                                flow = ovs_flow_tbl_lookup_exact(&dp->table,
1016                                                                 &match);
1017                        else /* UFID matches but key is different */
1018                                flow = NULL;
1019                        if (!flow) {
1020                                error = -ENOENT;
1021                                goto err_unlock_ovs;
1022                        }
1023                }
1024                /* Update actions. */
1025                old_acts = ovsl_dereference(flow->sf_acts);
1026                rcu_assign_pointer(flow->sf_acts, acts);
1027
1028                if (unlikely(reply)) {
1029                        error = ovs_flow_cmd_fill_info(flow,
1030                                                       ovs_header->dp_ifindex,
1031                                                       reply, info->snd_portid,
1032                                                       info->snd_seq, 0,
1033                                                       OVS_FLOW_CMD_NEW,
1034                                                       ufid_flags);
1035                        BUG_ON(error < 0);
1036                }
1037                ovs_unlock();
1038
1039                ovs_nla_free_flow_actions_rcu(old_acts);
1040                ovs_flow_free(new_flow, false);
1041        }
1042
1043        if (reply)
1044                ovs_notify(&dp_flow_genl_family, reply, info);
1045        return 0;
1046
1047err_unlock_ovs:
1048        ovs_unlock();
1049        kfree_skb(reply);
1050err_kfree_acts:
1051        ovs_nla_free_flow_actions(acts);
1052err_kfree_flow:
1053        ovs_flow_free(new_flow, false);
1054error:
1055        return error;
1056}
1057
1058/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1059static struct sw_flow_actions *get_flow_actions(struct net *net,
1060                                                const struct nlattr *a,
1061                                                const struct sw_flow_key *key,
1062                                                const struct sw_flow_mask *mask,
1063                                                bool log)
1064{
1065        struct sw_flow_actions *acts;
1066        struct sw_flow_key masked_key;
1067        int error;
1068
1069        ovs_flow_mask_key(&masked_key, key, true, mask);
1070        error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1071        if (error) {
1072                OVS_NLERR(log,
1073                          "Actions may not be safe on all matching packets");
1074                return ERR_PTR(error);
1075        }
1076
1077        return acts;
1078}
1079
1080/* Factor out match-init and action-copy to avoid
1081 * "Wframe-larger-than=1024" warning. Because mask is only
1082 * used to get actions, we new a function to save some
1083 * stack space.
1084 *
1085 * If there are not key and action attrs, we return 0
1086 * directly. In the case, the caller will also not use the
1087 * match as before. If there is action attr, we try to get
1088 * actions and save them to *acts. Before returning from
1089 * the function, we reset the match->mask pointer. Because
1090 * we should not to return match object with dangling reference
1091 * to mask.
1092 * */
1093static int ovs_nla_init_match_and_action(struct net *net,
1094                                         struct sw_flow_match *match,
1095                                         struct sw_flow_key *key,
1096                                         struct nlattr **a,
1097                                         struct sw_flow_actions **acts,
1098                                         bool log)
1099{
1100        struct sw_flow_mask mask;
1101        int error = 0;
1102
1103        if (a[OVS_FLOW_ATTR_KEY]) {
1104                ovs_match_init(match, key, true, &mask);
1105                error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1106                                          a[OVS_FLOW_ATTR_MASK], log);
1107                if (error)
1108                        goto error;
1109        }
1110
1111        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1112                if (!a[OVS_FLOW_ATTR_KEY]) {
1113                        OVS_NLERR(log,
1114                                  "Flow key attribute not present in set flow.");
1115                        error = -EINVAL;
1116                        goto error;
1117                }
1118
1119                *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1120                                         &mask, log);
1121                if (IS_ERR(*acts)) {
1122                        error = PTR_ERR(*acts);
1123                        goto error;
1124                }
1125        }
1126
1127        /* On success, error is 0. */
1128error:
1129        match->mask = NULL;
1130        return error;
1131}
1132
1133static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1134{
1135        struct net *net = sock_net(skb->sk);
1136        struct nlattr **a = info->attrs;
1137        struct ovs_header *ovs_header = info->userhdr;
1138        struct sw_flow_key key;
1139        struct sw_flow *flow;
1140        struct sk_buff *reply = NULL;
1141        struct datapath *dp;
1142        struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1143        struct sw_flow_match match;
1144        struct sw_flow_id sfid;
1145        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1146        int error = 0;
1147        bool log = !a[OVS_FLOW_ATTR_PROBE];
1148        bool ufid_present;
1149
1150        ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1151        if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1152                OVS_NLERR(log,
1153                          "Flow set message rejected, Key attribute missing.");
1154                return -EINVAL;
1155        }
1156
1157        error = ovs_nla_init_match_and_action(net, &match, &key, a,
1158                                              &acts, log);
1159        if (error)
1160                goto error;
1161
1162        if (acts) {
1163                /* Can allocate before locking if have acts. */
1164                reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1165                                                ufid_flags);
1166                if (IS_ERR(reply)) {
1167                        error = PTR_ERR(reply);
1168                        goto err_kfree_acts;
1169                }
1170        }
1171
1172        ovs_lock();
1173        dp = get_dp(net, ovs_header->dp_ifindex);
1174        if (unlikely(!dp)) {
1175                error = -ENODEV;
1176                goto err_unlock_ovs;
1177        }
1178        /* Check that the flow exists. */
1179        if (ufid_present)
1180                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1181        else
1182                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1183        if (unlikely(!flow)) {
1184                error = -ENOENT;
1185                goto err_unlock_ovs;
1186        }
1187
1188        /* Update actions, if present. */
1189        if (likely(acts)) {
1190                old_acts = ovsl_dereference(flow->sf_acts);
1191                rcu_assign_pointer(flow->sf_acts, acts);
1192
1193                if (unlikely(reply)) {
1194                        error = ovs_flow_cmd_fill_info(flow,
1195                                                       ovs_header->dp_ifindex,
1196                                                       reply, info->snd_portid,
1197                                                       info->snd_seq, 0,
1198                                                       OVS_FLOW_CMD_NEW,
1199                                                       ufid_flags);
1200                        BUG_ON(error < 0);
1201                }
1202        } else {
1203                /* Could not alloc without acts before locking. */
1204                reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1205                                                info, OVS_FLOW_CMD_NEW, false,
1206                                                ufid_flags);
1207
1208                if (IS_ERR(reply)) {
1209                        error = PTR_ERR(reply);
1210                        goto err_unlock_ovs;
1211                }
1212        }
1213
1214        /* Clear stats. */
1215        if (a[OVS_FLOW_ATTR_CLEAR])
1216                ovs_flow_stats_clear(flow);
1217        ovs_unlock();
1218
1219        if (reply)
1220                ovs_notify(&dp_flow_genl_family, reply, info);
1221        if (old_acts)
1222                ovs_nla_free_flow_actions_rcu(old_acts);
1223
1224        return 0;
1225
1226err_unlock_ovs:
1227        ovs_unlock();
1228        kfree_skb(reply);
1229err_kfree_acts:
1230        ovs_nla_free_flow_actions(acts);
1231error:
1232        return error;
1233}
1234
1235static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1236{
1237        struct nlattr **a = info->attrs;
1238        struct ovs_header *ovs_header = info->userhdr;
1239        struct net *net = sock_net(skb->sk);
1240        struct sw_flow_key key;
1241        struct sk_buff *reply;
1242        struct sw_flow *flow;
1243        struct datapath *dp;
1244        struct sw_flow_match match;
1245        struct sw_flow_id ufid;
1246        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1247        int err = 0;
1248        bool log = !a[OVS_FLOW_ATTR_PROBE];
1249        bool ufid_present;
1250
1251        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1252        if (a[OVS_FLOW_ATTR_KEY]) {
1253                ovs_match_init(&match, &key, true, NULL);
1254                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1255                                        log);
1256        } else if (!ufid_present) {
1257                OVS_NLERR(log,
1258                          "Flow get message rejected, Key attribute missing.");
1259                err = -EINVAL;
1260        }
1261        if (err)
1262                return err;
1263
1264        ovs_lock();
1265        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1266        if (!dp) {
1267                err = -ENODEV;
1268                goto unlock;
1269        }
1270
1271        if (ufid_present)
1272                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1273        else
1274                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1275        if (!flow) {
1276                err = -ENOENT;
1277                goto unlock;
1278        }
1279
1280        reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1281                                        OVS_FLOW_CMD_NEW, true, ufid_flags);
1282        if (IS_ERR(reply)) {
1283                err = PTR_ERR(reply);
1284                goto unlock;
1285        }
1286
1287        ovs_unlock();
1288        return genlmsg_reply(reply, info);
1289unlock:
1290        ovs_unlock();
1291        return err;
1292}
1293
1294static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1295{
1296        struct nlattr **a = info->attrs;
1297        struct ovs_header *ovs_header = info->userhdr;
1298        struct net *net = sock_net(skb->sk);
1299        struct sw_flow_key key;
1300        struct sk_buff *reply;
1301        struct sw_flow *flow = NULL;
1302        struct datapath *dp;
1303        struct sw_flow_match match;
1304        struct sw_flow_id ufid;
1305        u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1306        int err;
1307        bool log = !a[OVS_FLOW_ATTR_PROBE];
1308        bool ufid_present;
1309
1310        ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1311        if (a[OVS_FLOW_ATTR_KEY]) {
1312                ovs_match_init(&match, &key, true, NULL);
1313                err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1314                                        NULL, log);
1315                if (unlikely(err))
1316                        return err;
1317        }
1318
1319        ovs_lock();
1320        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1321        if (unlikely(!dp)) {
1322                err = -ENODEV;
1323                goto unlock;
1324        }
1325
1326        if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1327                err = ovs_flow_tbl_flush(&dp->table);
1328                goto unlock;
1329        }
1330
1331        if (ufid_present)
1332                flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1333        else
1334                flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1335        if (unlikely(!flow)) {
1336                err = -ENOENT;
1337                goto unlock;
1338        }
1339
1340        ovs_flow_tbl_remove(&dp->table, flow);
1341        ovs_unlock();
1342
1343        reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1344                                        &flow->id, info, false, ufid_flags);
1345        if (likely(reply)) {
1346                if (likely(!IS_ERR(reply))) {
1347                        rcu_read_lock();        /*To keep RCU checker happy. */
1348                        err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1349                                                     reply, info->snd_portid,
1350                                                     info->snd_seq, 0,
1351                                                     OVS_FLOW_CMD_DEL,
1352                                                     ufid_flags);
1353                        rcu_read_unlock();
1354                        BUG_ON(err < 0);
1355
1356                        ovs_notify(&dp_flow_genl_family, reply, info);
1357                } else {
1358                        netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1359                }
1360        }
1361
1362        ovs_flow_free(flow, true);
1363        return 0;
1364unlock:
1365        ovs_unlock();
1366        return err;
1367}
1368
1369static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1370{
1371        struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1372        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1373        struct table_instance *ti;
1374        struct datapath *dp;
1375        u32 ufid_flags;
1376        int err;
1377
1378        err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1379                            OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1380        if (err)
1381                return err;
1382        ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1383
1384        rcu_read_lock();
1385        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1386        if (!dp) {
1387                rcu_read_unlock();
1388                return -ENODEV;
1389        }
1390
1391        ti = rcu_dereference(dp->table.ti);
1392        for (;;) {
1393                struct sw_flow *flow;
1394                u32 bucket, obj;
1395
1396                bucket = cb->args[0];
1397                obj = cb->args[1];
1398                flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1399                if (!flow)
1400                        break;
1401
1402                if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1403                                           NETLINK_CB(cb->skb).portid,
1404                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1405                                           OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1406                        break;
1407
1408                cb->args[0] = bucket;
1409                cb->args[1] = obj;
1410        }
1411        rcu_read_unlock();
1412        return skb->len;
1413}
1414
1415static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1416        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1417        [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1418        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1419        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1420        [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1421        [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1422        [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1423};
1424
1425static const struct genl_ops dp_flow_genl_ops[] = {
1426        { .cmd = OVS_FLOW_CMD_NEW,
1427          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1428          .policy = flow_policy,
1429          .doit = ovs_flow_cmd_new
1430        },
1431        { .cmd = OVS_FLOW_CMD_DEL,
1432          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1433          .policy = flow_policy,
1434          .doit = ovs_flow_cmd_del
1435        },
1436        { .cmd = OVS_FLOW_CMD_GET,
1437          .flags = 0,               /* OK for unprivileged users. */
1438          .policy = flow_policy,
1439          .doit = ovs_flow_cmd_get,
1440          .dumpit = ovs_flow_cmd_dump
1441        },
1442        { .cmd = OVS_FLOW_CMD_SET,
1443          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1444          .policy = flow_policy,
1445          .doit = ovs_flow_cmd_set,
1446        },
1447};
1448
1449static struct genl_family dp_flow_genl_family __ro_after_init = {
1450        .hdrsize = sizeof(struct ovs_header),
1451        .name = OVS_FLOW_FAMILY,
1452        .version = OVS_FLOW_VERSION,
1453        .maxattr = OVS_FLOW_ATTR_MAX,
1454        .netnsok = true,
1455        .parallel_ops = true,
1456        .ops = dp_flow_genl_ops,
1457        .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1458        .mcgrps = &ovs_dp_flow_multicast_group,
1459        .n_mcgrps = 1,
1460        .module = THIS_MODULE,
1461};
1462
1463static size_t ovs_dp_cmd_msg_size(void)
1464{
1465        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1466
1467        msgsize += nla_total_size(IFNAMSIZ);
1468        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1469        msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1470        msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1471
1472        return msgsize;
1473}
1474
1475/* Called with ovs_mutex. */
1476static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1477                                u32 portid, u32 seq, u32 flags, u8 cmd)
1478{
1479        struct ovs_header *ovs_header;
1480        struct ovs_dp_stats dp_stats;
1481        struct ovs_dp_megaflow_stats dp_megaflow_stats;
1482        int err;
1483
1484        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1485                                   flags, cmd);
1486        if (!ovs_header)
1487                goto error;
1488
1489        ovs_header->dp_ifindex = get_dpifindex(dp);
1490
1491        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1492        if (err)
1493                goto nla_put_failure;
1494
1495        get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1496        if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1497                          &dp_stats, OVS_DP_ATTR_PAD))
1498                goto nla_put_failure;
1499
1500        if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1501                          sizeof(struct ovs_dp_megaflow_stats),
1502                          &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1503                goto nla_put_failure;
1504
1505        if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1506                goto nla_put_failure;
1507
1508        genlmsg_end(skb, ovs_header);
1509        return 0;
1510
1511nla_put_failure:
1512        genlmsg_cancel(skb, ovs_header);
1513error:
1514        return -EMSGSIZE;
1515}
1516
1517static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1518{
1519        return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1520}
1521
1522/* Called with rcu_read_lock or ovs_mutex. */
1523static struct datapath *lookup_datapath(struct net *net,
1524                                        const struct ovs_header *ovs_header,
1525                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1526{
1527        struct datapath *dp;
1528
1529        if (!a[OVS_DP_ATTR_NAME])
1530                dp = get_dp(net, ovs_header->dp_ifindex);
1531        else {
1532                struct vport *vport;
1533
1534                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1535                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1536        }
1537        return dp ? dp : ERR_PTR(-ENODEV);
1538}
1539
1540static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1541{
1542        struct datapath *dp;
1543
1544        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1545        if (IS_ERR(dp))
1546                return;
1547
1548        WARN(dp->user_features, "Dropping previously announced user features\n");
1549        dp->user_features = 0;
1550}
1551
1552static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1553{
1554        if (a[OVS_DP_ATTR_USER_FEATURES])
1555                dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1556}
1557
1558static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1559{
1560        struct nlattr **a = info->attrs;
1561        struct vport_parms parms;
1562        struct sk_buff *reply;
1563        struct datapath *dp;
1564        struct vport *vport;
1565        struct ovs_net *ovs_net;
1566        int err, i;
1567
1568        err = -EINVAL;
1569        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1570                goto err;
1571
1572        reply = ovs_dp_cmd_alloc_info();
1573        if (!reply)
1574                return -ENOMEM;
1575
1576        err = -ENOMEM;
1577        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1578        if (dp == NULL)
1579                goto err_free_reply;
1580
1581        ovs_dp_set_net(dp, sock_net(skb->sk));
1582
1583        /* Allocate table. */
1584        err = ovs_flow_tbl_init(&dp->table);
1585        if (err)
1586                goto err_free_dp;
1587
1588        dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1589        if (!dp->stats_percpu) {
1590                err = -ENOMEM;
1591                goto err_destroy_table;
1592        }
1593
1594        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1595                            GFP_KERNEL);
1596        if (!dp->ports) {
1597                err = -ENOMEM;
1598                goto err_destroy_percpu;
1599        }
1600
1601        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1602                INIT_HLIST_HEAD(&dp->ports[i]);
1603
1604        /* Set up our datapath device. */
1605        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1606        parms.type = OVS_VPORT_TYPE_INTERNAL;
1607        parms.options = NULL;
1608        parms.dp = dp;
1609        parms.port_no = OVSP_LOCAL;
1610        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1611
1612        ovs_dp_change(dp, a);
1613
1614        /* So far only local changes have been made, now need the lock. */
1615        ovs_lock();
1616
1617        vport = new_vport(&parms);
1618        if (IS_ERR(vport)) {
1619                err = PTR_ERR(vport);
1620                if (err == -EBUSY)
1621                        err = -EEXIST;
1622
1623                if (err == -EEXIST) {
1624                        /* An outdated user space instance that does not understand
1625                         * the concept of user_features has attempted to create a new
1626                         * datapath and is likely to reuse it. Drop all user features.
1627                         */
1628                        if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1629                                ovs_dp_reset_user_features(skb, info);
1630                }
1631
1632                goto err_destroy_ports_array;
1633        }
1634
1635        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1636                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1637        BUG_ON(err < 0);
1638
1639        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1640        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1641
1642        ovs_unlock();
1643
1644        ovs_notify(&dp_datapath_genl_family, reply, info);
1645        return 0;
1646
1647err_destroy_ports_array:
1648        ovs_unlock();
1649        kfree(dp->ports);
1650err_destroy_percpu:
1651        free_percpu(dp->stats_percpu);
1652err_destroy_table:
1653        ovs_flow_tbl_destroy(&dp->table);
1654err_free_dp:
1655        kfree(dp);
1656err_free_reply:
1657        kfree_skb(reply);
1658err:
1659        return err;
1660}
1661
1662/* Called with ovs_mutex. */
1663static void __dp_destroy(struct datapath *dp)
1664{
1665        int i;
1666
1667        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1668                struct vport *vport;
1669                struct hlist_node *n;
1670
1671                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1672                        if (vport->port_no != OVSP_LOCAL)
1673                                ovs_dp_detach_port(vport);
1674        }
1675
1676        list_del_rcu(&dp->list_node);
1677
1678        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1679         * all ports in datapath are destroyed first before freeing datapath.
1680         */
1681        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1682
1683        /* RCU destroy the flow table */
1684        call_rcu(&dp->rcu, destroy_dp_rcu);
1685}
1686
1687static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1688{
1689        struct sk_buff *reply;
1690        struct datapath *dp;
1691        int err;
1692
1693        reply = ovs_dp_cmd_alloc_info();
1694        if (!reply)
1695                return -ENOMEM;
1696
1697        ovs_lock();
1698        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1699        err = PTR_ERR(dp);
1700        if (IS_ERR(dp))
1701                goto err_unlock_free;
1702
1703        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1704                                   info->snd_seq, 0, OVS_DP_CMD_DEL);
1705        BUG_ON(err < 0);
1706
1707        __dp_destroy(dp);
1708        ovs_unlock();
1709
1710        ovs_notify(&dp_datapath_genl_family, reply, info);
1711
1712        return 0;
1713
1714err_unlock_free:
1715        ovs_unlock();
1716        kfree_skb(reply);
1717        return err;
1718}
1719
1720static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1721{
1722        struct sk_buff *reply;
1723        struct datapath *dp;
1724        int err;
1725
1726        reply = ovs_dp_cmd_alloc_info();
1727        if (!reply)
1728                return -ENOMEM;
1729
1730        ovs_lock();
1731        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1732        err = PTR_ERR(dp);
1733        if (IS_ERR(dp))
1734                goto err_unlock_free;
1735
1736        ovs_dp_change(dp, info->attrs);
1737
1738        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1739                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1740        BUG_ON(err < 0);
1741
1742        ovs_unlock();
1743        ovs_notify(&dp_datapath_genl_family, reply, info);
1744
1745        return 0;
1746
1747err_unlock_free:
1748        ovs_unlock();
1749        kfree_skb(reply);
1750        return err;
1751}
1752
1753static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1754{
1755        struct sk_buff *reply;
1756        struct datapath *dp;
1757        int err;
1758
1759        reply = ovs_dp_cmd_alloc_info();
1760        if (!reply)
1761                return -ENOMEM;
1762
1763        ovs_lock();
1764        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1765        if (IS_ERR(dp)) {
1766                err = PTR_ERR(dp);
1767                goto err_unlock_free;
1768        }
1769        err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1770                                   info->snd_seq, 0, OVS_DP_CMD_NEW);
1771        BUG_ON(err < 0);
1772        ovs_unlock();
1773
1774        return genlmsg_reply(reply, info);
1775
1776err_unlock_free:
1777        ovs_unlock();
1778        kfree_skb(reply);
1779        return err;
1780}
1781
1782static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1783{
1784        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1785        struct datapath *dp;
1786        int skip = cb->args[0];
1787        int i = 0;
1788
1789        ovs_lock();
1790        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1791                if (i >= skip &&
1792                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1793                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1794                                         OVS_DP_CMD_NEW) < 0)
1795                        break;
1796                i++;
1797        }
1798        ovs_unlock();
1799
1800        cb->args[0] = i;
1801
1802        return skb->len;
1803}
1804
1805static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1806        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1807        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1808        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1809};
1810
1811static const struct genl_ops dp_datapath_genl_ops[] = {
1812        { .cmd = OVS_DP_CMD_NEW,
1813          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1814          .policy = datapath_policy,
1815          .doit = ovs_dp_cmd_new
1816        },
1817        { .cmd = OVS_DP_CMD_DEL,
1818          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1819          .policy = datapath_policy,
1820          .doit = ovs_dp_cmd_del
1821        },
1822        { .cmd = OVS_DP_CMD_GET,
1823          .flags = 0,               /* OK for unprivileged users. */
1824          .policy = datapath_policy,
1825          .doit = ovs_dp_cmd_get,
1826          .dumpit = ovs_dp_cmd_dump
1827        },
1828        { .cmd = OVS_DP_CMD_SET,
1829          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1830          .policy = datapath_policy,
1831          .doit = ovs_dp_cmd_set,
1832        },
1833};
1834
1835static struct genl_family dp_datapath_genl_family __ro_after_init = {
1836        .hdrsize = sizeof(struct ovs_header),
1837        .name = OVS_DATAPATH_FAMILY,
1838        .version = OVS_DATAPATH_VERSION,
1839        .maxattr = OVS_DP_ATTR_MAX,
1840        .netnsok = true,
1841        .parallel_ops = true,
1842        .ops = dp_datapath_genl_ops,
1843        .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1844        .mcgrps = &ovs_dp_datapath_multicast_group,
1845        .n_mcgrps = 1,
1846        .module = THIS_MODULE,
1847};
1848
1849/* Called with ovs_mutex or RCU read lock. */
1850static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1851                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1852{
1853        struct ovs_header *ovs_header;
1854        struct ovs_vport_stats vport_stats;
1855        int err;
1856
1857        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1858                                 flags, cmd);
1859        if (!ovs_header)
1860                return -EMSGSIZE;
1861
1862        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1863
1864        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1865            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1866            nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1867                           ovs_vport_name(vport)))
1868                goto nla_put_failure;
1869
1870        ovs_vport_get_stats(vport, &vport_stats);
1871        if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1872                          sizeof(struct ovs_vport_stats), &vport_stats,
1873                          OVS_VPORT_ATTR_PAD))
1874                goto nla_put_failure;
1875
1876        if (ovs_vport_get_upcall_portids(vport, skb))
1877                goto nla_put_failure;
1878
1879        err = ovs_vport_get_options(vport, skb);
1880        if (err == -EMSGSIZE)
1881                goto error;
1882
1883        genlmsg_end(skb, ovs_header);
1884        return 0;
1885
1886nla_put_failure:
1887        err = -EMSGSIZE;
1888error:
1889        genlmsg_cancel(skb, ovs_header);
1890        return err;
1891}
1892
1893static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1894{
1895        return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1896}
1897
1898/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1899struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1900                                         u32 seq, u8 cmd)
1901{
1902        struct sk_buff *skb;
1903        int retval;
1904
1905        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1906        if (!skb)
1907                return ERR_PTR(-ENOMEM);
1908
1909        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1910        BUG_ON(retval < 0);
1911
1912        return skb;
1913}
1914
1915/* Called with ovs_mutex or RCU read lock. */
1916static struct vport *lookup_vport(struct net *net,
1917                                  const struct ovs_header *ovs_header,
1918                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1919{
1920        struct datapath *dp;
1921        struct vport *vport;
1922
1923        if (a[OVS_VPORT_ATTR_NAME]) {
1924                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1925                if (!vport)
1926                        return ERR_PTR(-ENODEV);
1927                if (ovs_header->dp_ifindex &&
1928                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1929                        return ERR_PTR(-ENODEV);
1930                return vport;
1931        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1932                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1933
1934                if (port_no >= DP_MAX_PORTS)
1935                        return ERR_PTR(-EFBIG);
1936
1937                dp = get_dp(net, ovs_header->dp_ifindex);
1938                if (!dp)
1939                        return ERR_PTR(-ENODEV);
1940
1941                vport = ovs_vport_ovsl_rcu(dp, port_no);
1942                if (!vport)
1943                        return ERR_PTR(-ENODEV);
1944                return vport;
1945        } else
1946                return ERR_PTR(-EINVAL);
1947}
1948
1949/* Called with ovs_mutex */
1950static void update_headroom(struct datapath *dp)
1951{
1952        unsigned dev_headroom, max_headroom = 0;
1953        struct net_device *dev;
1954        struct vport *vport;
1955        int i;
1956
1957        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1958                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1959                        dev = vport->dev;
1960                        dev_headroom = netdev_get_fwd_headroom(dev);
1961                        if (dev_headroom > max_headroom)
1962                                max_headroom = dev_headroom;
1963                }
1964        }
1965
1966        dp->max_headroom = max_headroom;
1967        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1968                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1969                        netdev_set_rx_headroom(vport->dev, max_headroom);
1970}
1971
1972static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1973{
1974        struct nlattr **a = info->attrs;
1975        struct ovs_header *ovs_header = info->userhdr;
1976        struct vport_parms parms;
1977        struct sk_buff *reply;
1978        struct vport *vport;
1979        struct datapath *dp;
1980        u32 port_no;
1981        int err;
1982
1983        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1984            !a[OVS_VPORT_ATTR_UPCALL_PID])
1985                return -EINVAL;
1986
1987        port_no = a[OVS_VPORT_ATTR_PORT_NO]
1988                ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1989        if (port_no >= DP_MAX_PORTS)
1990                return -EFBIG;
1991
1992        reply = ovs_vport_cmd_alloc_info();
1993        if (!reply)
1994                return -ENOMEM;
1995
1996        ovs_lock();
1997restart:
1998        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1999        err = -ENODEV;
2000        if (!dp)
2001                goto exit_unlock_free;
2002
2003        if (port_no) {
2004                vport = ovs_vport_ovsl(dp, port_no);
2005                err = -EBUSY;
2006                if (vport)
2007                        goto exit_unlock_free;
2008        } else {
2009                for (port_no = 1; ; port_no++) {
2010                        if (port_no >= DP_MAX_PORTS) {
2011                                err = -EFBIG;
2012                                goto exit_unlock_free;
2013                        }
2014                        vport = ovs_vport_ovsl(dp, port_no);
2015                        if (!vport)
2016                                break;
2017                }
2018        }
2019
2020        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2021        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2022        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2023        parms.dp = dp;
2024        parms.port_no = port_no;
2025        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2026
2027        vport = new_vport(&parms);
2028        err = PTR_ERR(vport);
2029        if (IS_ERR(vport)) {
2030                if (err == -EAGAIN)
2031                        goto restart;
2032                goto exit_unlock_free;
2033        }
2034
2035        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2036                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2037
2038        if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2039                update_headroom(dp);
2040        else
2041                netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2042
2043        BUG_ON(err < 0);
2044        ovs_unlock();
2045
2046        ovs_notify(&dp_vport_genl_family, reply, info);
2047        return 0;
2048
2049exit_unlock_free:
2050        ovs_unlock();
2051        kfree_skb(reply);
2052        return err;
2053}
2054
2055static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2056{
2057        struct nlattr **a = info->attrs;
2058        struct sk_buff *reply;
2059        struct vport *vport;
2060        int err;
2061
2062        reply = ovs_vport_cmd_alloc_info();
2063        if (!reply)
2064                return -ENOMEM;
2065
2066        ovs_lock();
2067        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2068        err = PTR_ERR(vport);
2069        if (IS_ERR(vport))
2070                goto exit_unlock_free;
2071
2072        if (a[OVS_VPORT_ATTR_TYPE] &&
2073            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2074                err = -EINVAL;
2075                goto exit_unlock_free;
2076        }
2077
2078        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2079                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2080                if (err)
2081                        goto exit_unlock_free;
2082        }
2083
2084
2085        if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2086                struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2087
2088                err = ovs_vport_set_upcall_portids(vport, ids);
2089                if (err)
2090                        goto exit_unlock_free;
2091        }
2092
2093        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2094                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2095        BUG_ON(err < 0);
2096
2097        ovs_unlock();
2098        ovs_notify(&dp_vport_genl_family, reply, info);
2099        return 0;
2100
2101exit_unlock_free:
2102        ovs_unlock();
2103        kfree_skb(reply);
2104        return err;
2105}
2106
2107static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2108{
2109        bool must_update_headroom = false;
2110        struct nlattr **a = info->attrs;
2111        struct sk_buff *reply;
2112        struct datapath *dp;
2113        struct vport *vport;
2114        int err;
2115
2116        reply = ovs_vport_cmd_alloc_info();
2117        if (!reply)
2118                return -ENOMEM;
2119
2120        ovs_lock();
2121        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2122        err = PTR_ERR(vport);
2123        if (IS_ERR(vport))
2124                goto exit_unlock_free;
2125
2126        if (vport->port_no == OVSP_LOCAL) {
2127                err = -EINVAL;
2128                goto exit_unlock_free;
2129        }
2130
2131        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2132                                      info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2133        BUG_ON(err < 0);
2134
2135        /* the vport deletion may trigger dp headroom update */
2136        dp = vport->dp;
2137        if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2138                must_update_headroom = true;
2139        netdev_reset_rx_headroom(vport->dev);
2140        ovs_dp_detach_port(vport);
2141
2142        if (must_update_headroom)
2143                update_headroom(dp);
2144        ovs_unlock();
2145
2146        ovs_notify(&dp_vport_genl_family, reply, info);
2147        return 0;
2148
2149exit_unlock_free:
2150        ovs_unlock();
2151        kfree_skb(reply);
2152        return err;
2153}
2154
2155static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2156{
2157        struct nlattr **a = info->attrs;
2158        struct ovs_header *ovs_header = info->userhdr;
2159        struct sk_buff *reply;
2160        struct vport *vport;
2161        int err;
2162
2163        reply = ovs_vport_cmd_alloc_info();
2164        if (!reply)
2165                return -ENOMEM;
2166
2167        rcu_read_lock();
2168        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2169        err = PTR_ERR(vport);
2170        if (IS_ERR(vport))
2171                goto exit_unlock_free;
2172        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2173                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2174        BUG_ON(err < 0);
2175        rcu_read_unlock();
2176
2177        return genlmsg_reply(reply, info);
2178
2179exit_unlock_free:
2180        rcu_read_unlock();
2181        kfree_skb(reply);
2182        return err;
2183}
2184
2185static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2186{
2187        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2188        struct datapath *dp;
2189        int bucket = cb->args[0], skip = cb->args[1];
2190        int i, j = 0;
2191
2192        rcu_read_lock();
2193        dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2194        if (!dp) {
2195                rcu_read_unlock();
2196                return -ENODEV;
2197        }
2198        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2199                struct vport *vport;
2200
2201                j = 0;
2202                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2203                        if (j >= skip &&
2204                            ovs_vport_cmd_fill_info(vport, skb,
2205                                                    NETLINK_CB(cb->skb).portid,
2206                                                    cb->nlh->nlmsg_seq,
2207                                                    NLM_F_MULTI,
2208                                                    OVS_VPORT_CMD_NEW) < 0)
2209                                goto out;
2210
2211                        j++;
2212                }
2213                skip = 0;
2214        }
2215out:
2216        rcu_read_unlock();
2217
2218        cb->args[0] = i;
2219        cb->args[1] = j;
2220
2221        return skb->len;
2222}
2223
2224static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2225        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2226        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2227        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2228        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2229        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2230        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2231};
2232
2233static const struct genl_ops dp_vport_genl_ops[] = {
2234        { .cmd = OVS_VPORT_CMD_NEW,
2235          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2236          .policy = vport_policy,
2237          .doit = ovs_vport_cmd_new
2238        },
2239        { .cmd = OVS_VPORT_CMD_DEL,
2240          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2241          .policy = vport_policy,
2242          .doit = ovs_vport_cmd_del
2243        },
2244        { .cmd = OVS_VPORT_CMD_GET,
2245          .flags = 0,               /* OK for unprivileged users. */
2246          .policy = vport_policy,
2247          .doit = ovs_vport_cmd_get,
2248          .dumpit = ovs_vport_cmd_dump
2249        },
2250        { .cmd = OVS_VPORT_CMD_SET,
2251          .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2252          .policy = vport_policy,
2253          .doit = ovs_vport_cmd_set,
2254        },
2255};
2256
2257struct genl_family dp_vport_genl_family __ro_after_init = {
2258        .hdrsize = sizeof(struct ovs_header),
2259        .name = OVS_VPORT_FAMILY,
2260        .version = OVS_VPORT_VERSION,
2261        .maxattr = OVS_VPORT_ATTR_MAX,
2262        .netnsok = true,
2263        .parallel_ops = true,
2264        .ops = dp_vport_genl_ops,
2265        .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2266        .mcgrps = &ovs_dp_vport_multicast_group,
2267        .n_mcgrps = 1,
2268        .module = THIS_MODULE,
2269};
2270
2271static struct genl_family * const dp_genl_families[] = {
2272        &dp_datapath_genl_family,
2273        &dp_vport_genl_family,
2274        &dp_flow_genl_family,
2275        &dp_packet_genl_family,
2276};
2277
2278static void dp_unregister_genl(int n_families)
2279{
2280        int i;
2281
2282        for (i = 0; i < n_families; i++)
2283                genl_unregister_family(dp_genl_families[i]);
2284}
2285
2286static int __init dp_register_genl(void)
2287{
2288        int err;
2289        int i;
2290
2291        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2292
2293                err = genl_register_family(dp_genl_families[i]);
2294                if (err)
2295                        goto error;
2296        }
2297
2298        return 0;
2299
2300error:
2301        dp_unregister_genl(i);
2302        return err;
2303}
2304
2305static int __net_init ovs_init_net(struct net *net)
2306{
2307        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2308
2309        INIT_LIST_HEAD(&ovs_net->dps);
2310        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2311        ovs_ct_init(net);
2312        return 0;
2313}
2314
2315static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2316                                            struct list_head *head)
2317{
2318        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2319        struct datapath *dp;
2320
2321        list_for_each_entry(dp, &ovs_net->dps, list_node) {
2322                int i;
2323
2324                for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2325                        struct vport *vport;
2326
2327                        hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2328                                if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2329                                        continue;
2330
2331                                if (dev_net(vport->dev) == dnet)
2332                                        list_add(&vport->detach_list, head);
2333                        }
2334                }
2335        }
2336}
2337
2338static void __net_exit ovs_exit_net(struct net *dnet)
2339{
2340        struct datapath *dp, *dp_next;
2341        struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2342        struct vport *vport, *vport_next;
2343        struct net *net;
2344        LIST_HEAD(head);
2345
2346        ovs_ct_exit(dnet);
2347        ovs_lock();
2348        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2349                __dp_destroy(dp);
2350
2351        rtnl_lock();
2352        for_each_net(net)
2353                list_vports_from_net(net, dnet, &head);
2354        rtnl_unlock();
2355
2356        /* Detach all vports from given namespace. */
2357        list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2358                list_del(&vport->detach_list);
2359                ovs_dp_detach_port(vport);
2360        }
2361
2362        ovs_unlock();
2363
2364        cancel_work_sync(&ovs_net->dp_notify_work);
2365}
2366
2367static struct pernet_operations ovs_net_ops = {
2368        .init = ovs_init_net,
2369        .exit = ovs_exit_net,
2370        .id   = &ovs_net_id,
2371        .size = sizeof(struct ovs_net),
2372};
2373
2374static int __init dp_init(void)
2375{
2376        int err;
2377
2378        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2379
2380        pr_info("Open vSwitch switching datapath\n");
2381
2382        err = action_fifos_init();
2383        if (err)
2384                goto error;
2385
2386        err = ovs_internal_dev_rtnl_link_register();
2387        if (err)
2388                goto error_action_fifos_exit;
2389
2390        err = ovs_flow_init();
2391        if (err)
2392                goto error_unreg_rtnl_link;
2393
2394        err = ovs_vport_init();
2395        if (err)
2396                goto error_flow_exit;
2397
2398        err = register_pernet_device(&ovs_net_ops);
2399        if (err)
2400                goto error_vport_exit;
2401
2402        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2403        if (err)
2404                goto error_netns_exit;
2405
2406        err = ovs_netdev_init();
2407        if (err)
2408                goto error_unreg_notifier;
2409
2410        err = dp_register_genl();
2411        if (err < 0)
2412                goto error_unreg_netdev;
2413
2414        return 0;
2415
2416error_unreg_netdev:
2417        ovs_netdev_exit();
2418error_unreg_notifier:
2419        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2420error_netns_exit:
2421        unregister_pernet_device(&ovs_net_ops);
2422error_vport_exit:
2423        ovs_vport_exit();
2424error_flow_exit:
2425        ovs_flow_exit();
2426error_unreg_rtnl_link:
2427        ovs_internal_dev_rtnl_link_unregister();
2428error_action_fifos_exit:
2429        action_fifos_exit();
2430error:
2431        return err;
2432}
2433
2434static void dp_cleanup(void)
2435{
2436        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2437        ovs_netdev_exit();
2438        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2439        unregister_pernet_device(&ovs_net_ops);
2440        rcu_barrier();
2441        ovs_vport_exit();
2442        ovs_flow_exit();
2443        ovs_internal_dev_rtnl_link_unregister();
2444        action_fifos_exit();
2445}
2446
2447module_init(dp_init);
2448module_exit(dp_cleanup);
2449
2450MODULE_DESCRIPTION("Open vSwitch switching datapath");
2451MODULE_LICENSE("GPL");
2452MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2453MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2454MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2455MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2456