linux/net/openvswitch/vport.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2007-2014 Nicira, Inc.
   4 */
   5
   6#include <linux/etherdevice.h>
   7#include <linux/if.h>
   8#include <linux/if_vlan.h>
   9#include <linux/jhash.h>
  10#include <linux/kernel.h>
  11#include <linux/list.h>
  12#include <linux/mutex.h>
  13#include <linux/percpu.h>
  14#include <linux/rcupdate.h>
  15#include <linux/rtnetlink.h>
  16#include <linux/compat.h>
  17#include <net/net_namespace.h>
  18#include <linux/module.h>
  19
  20#include "datapath.h"
  21#include "vport.h"
  22#include "vport-internal_dev.h"
  23
  24static LIST_HEAD(vport_ops_list);
  25
  26/* Protected by RCU read lock for reading, ovs_mutex for writing. */
  27static struct hlist_head *dev_table;
  28#define VPORT_HASH_BUCKETS 1024
  29
  30/**
  31 *      ovs_vport_init - initialize vport subsystem
  32 *
  33 * Called at module load time to initialize the vport subsystem.
  34 */
  35int ovs_vport_init(void)
  36{
  37        dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head),
  38                            GFP_KERNEL);
  39        if (!dev_table)
  40                return -ENOMEM;
  41
  42        return 0;
  43}
  44
  45/**
  46 *      ovs_vport_exit - shutdown vport subsystem
  47 *
  48 * Called at module exit time to shutdown the vport subsystem.
  49 */
  50void ovs_vport_exit(void)
  51{
  52        kfree(dev_table);
  53}
  54
  55static struct hlist_head *hash_bucket(const struct net *net, const char *name)
  56{
  57        unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
  58        return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
  59}
  60
  61int __ovs_vport_ops_register(struct vport_ops *ops)
  62{
  63        int err = -EEXIST;
  64        struct vport_ops *o;
  65
  66        ovs_lock();
  67        list_for_each_entry(o, &vport_ops_list, list)
  68                if (ops->type == o->type)
  69                        goto errout;
  70
  71        list_add_tail(&ops->list, &vport_ops_list);
  72        err = 0;
  73errout:
  74        ovs_unlock();
  75        return err;
  76}
  77EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
  78
  79void ovs_vport_ops_unregister(struct vport_ops *ops)
  80{
  81        ovs_lock();
  82        list_del(&ops->list);
  83        ovs_unlock();
  84}
  85EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
  86
  87/**
  88 *      ovs_vport_locate - find a port that has already been created
  89 *
  90 * @net: network namespace
  91 * @name: name of port to find
  92 *
  93 * Must be called with ovs or RCU read lock.
  94 */
  95struct vport *ovs_vport_locate(const struct net *net, const char *name)
  96{
  97        struct hlist_head *bucket = hash_bucket(net, name);
  98        struct vport *vport;
  99
 100        hlist_for_each_entry_rcu(vport, bucket, hash_node,
 101                                 lockdep_ovsl_is_held())
 102                if (!strcmp(name, ovs_vport_name(vport)) &&
 103                    net_eq(ovs_dp_get_net(vport->dp), net))
 104                        return vport;
 105
 106        return NULL;
 107}
 108
 109/**
 110 *      ovs_vport_alloc - allocate and initialize new vport
 111 *
 112 * @priv_size: Size of private data area to allocate.
 113 * @ops: vport device ops
 114 * @parms: information about new vport.
 115 *
 116 * Allocate and initialize a new vport defined by @ops.  The vport will contain
 117 * a private data area of size @priv_size that can be accessed using
 118 * vport_priv().  Some parameters of the vport will be initialized from @parms.
 119 * @vports that are no longer needed should be released with
 120 * vport_free().
 121 */
 122struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 123                              const struct vport_parms *parms)
 124{
 125        struct vport *vport;
 126        size_t alloc_size;
 127
 128        alloc_size = sizeof(struct vport);
 129        if (priv_size) {
 130                alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
 131                alloc_size += priv_size;
 132        }
 133
 134        vport = kzalloc(alloc_size, GFP_KERNEL);
 135        if (!vport)
 136                return ERR_PTR(-ENOMEM);
 137
 138        vport->dp = parms->dp;
 139        vport->port_no = parms->port_no;
 140        vport->ops = ops;
 141        INIT_HLIST_NODE(&vport->dp_hash_node);
 142
 143        if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
 144                kfree(vport);
 145                return ERR_PTR(-EINVAL);
 146        }
 147
 148        return vport;
 149}
 150EXPORT_SYMBOL_GPL(ovs_vport_alloc);
 151
 152/**
 153 *      ovs_vport_free - uninitialize and free vport
 154 *
 155 * @vport: vport to free
 156 *
 157 * Frees a vport allocated with vport_alloc() when it is no longer needed.
 158 *
 159 * The caller must ensure that an RCU grace period has passed since the last
 160 * time @vport was in a datapath.
 161 */
 162void ovs_vport_free(struct vport *vport)
 163{
 164        /* vport is freed from RCU callback or error path, Therefore
 165         * it is safe to use raw dereference.
 166         */
 167        kfree(rcu_dereference_raw(vport->upcall_portids));
 168        kfree(vport);
 169}
 170EXPORT_SYMBOL_GPL(ovs_vport_free);
 171
 172static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
 173{
 174        struct vport_ops *ops;
 175
 176        list_for_each_entry(ops, &vport_ops_list, list)
 177                if (ops->type == parms->type)
 178                        return ops;
 179
 180        return NULL;
 181}
 182
 183/**
 184 *      ovs_vport_add - add vport device (for kernel callers)
 185 *
 186 * @parms: Information about new vport.
 187 *
 188 * Creates a new vport with the specified configuration (which is dependent on
 189 * device type).  ovs_mutex must be held.
 190 */
 191struct vport *ovs_vport_add(const struct vport_parms *parms)
 192{
 193        struct vport_ops *ops;
 194        struct vport *vport;
 195
 196        ops = ovs_vport_lookup(parms);
 197        if (ops) {
 198                struct hlist_head *bucket;
 199
 200                if (!try_module_get(ops->owner))
 201                        return ERR_PTR(-EAFNOSUPPORT);
 202
 203                vport = ops->create(parms);
 204                if (IS_ERR(vport)) {
 205                        module_put(ops->owner);
 206                        return vport;
 207                }
 208
 209                bucket = hash_bucket(ovs_dp_get_net(vport->dp),
 210                                     ovs_vport_name(vport));
 211                hlist_add_head_rcu(&vport->hash_node, bucket);
 212                return vport;
 213        }
 214
 215        /* Unlock to attempt module load and return -EAGAIN if load
 216         * was successful as we need to restart the port addition
 217         * workflow.
 218         */
 219        ovs_unlock();
 220        request_module("vport-type-%d", parms->type);
 221        ovs_lock();
 222
 223        if (!ovs_vport_lookup(parms))
 224                return ERR_PTR(-EAFNOSUPPORT);
 225        else
 226                return ERR_PTR(-EAGAIN);
 227}
 228
 229/**
 230 *      ovs_vport_set_options - modify existing vport device (for kernel callers)
 231 *
 232 * @vport: vport to modify.
 233 * @options: New configuration.
 234 *
 235 * Modifies an existing device with the specified configuration (which is
 236 * dependent on device type).  ovs_mutex must be held.
 237 */
 238int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
 239{
 240        if (!vport->ops->set_options)
 241                return -EOPNOTSUPP;
 242        return vport->ops->set_options(vport, options);
 243}
 244
 245/**
 246 *      ovs_vport_del - delete existing vport device
 247 *
 248 * @vport: vport to delete.
 249 *
 250 * Detaches @vport from its datapath and destroys it.  ovs_mutex must
 251 * be held.
 252 */
 253void ovs_vport_del(struct vport *vport)
 254{
 255        hlist_del_rcu(&vport->hash_node);
 256        module_put(vport->ops->owner);
 257        vport->ops->destroy(vport);
 258}
 259
 260/**
 261 *      ovs_vport_get_stats - retrieve device stats
 262 *
 263 * @vport: vport from which to retrieve the stats
 264 * @stats: location to store stats
 265 *
 266 * Retrieves transmit, receive, and error stats for the given device.
 267 *
 268 * Must be called with ovs_mutex or rcu_read_lock.
 269 */
 270void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
 271{
 272        const struct rtnl_link_stats64 *dev_stats;
 273        struct rtnl_link_stats64 temp;
 274
 275        dev_stats = dev_get_stats(vport->dev, &temp);
 276        stats->rx_errors  = dev_stats->rx_errors;
 277        stats->tx_errors  = dev_stats->tx_errors;
 278        stats->tx_dropped = dev_stats->tx_dropped;
 279        stats->rx_dropped = dev_stats->rx_dropped;
 280
 281        stats->rx_bytes   = dev_stats->rx_bytes;
 282        stats->rx_packets = dev_stats->rx_packets;
 283        stats->tx_bytes   = dev_stats->tx_bytes;
 284        stats->tx_packets = dev_stats->tx_packets;
 285}
 286
 287/**
 288 *      ovs_vport_get_options - retrieve device options
 289 *
 290 * @vport: vport from which to retrieve the options.
 291 * @skb: sk_buff where options should be appended.
 292 *
 293 * Retrieves the configuration of the given device, appending an
 294 * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
 295 * vport-specific attributes to @skb.
 296 *
 297 * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
 298 * negative error code if a real error occurred.  If an error occurs, @skb is
 299 * left unmodified.
 300 *
 301 * Must be called with ovs_mutex or rcu_read_lock.
 302 */
 303int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
 304{
 305        struct nlattr *nla;
 306        int err;
 307
 308        if (!vport->ops->get_options)
 309                return 0;
 310
 311        nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS);
 312        if (!nla)
 313                return -EMSGSIZE;
 314
 315        err = vport->ops->get_options(vport, skb);
 316        if (err) {
 317                nla_nest_cancel(skb, nla);
 318                return err;
 319        }
 320
 321        nla_nest_end(skb, nla);
 322        return 0;
 323}
 324
 325/**
 326 *      ovs_vport_set_upcall_portids - set upcall portids of @vport.
 327 *
 328 * @vport: vport to modify.
 329 * @ids: new configuration, an array of port ids.
 330 *
 331 * Sets the vport's upcall_portids to @ids.
 332 *
 333 * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
 334 * as an array of U32.
 335 *
 336 * Must be called with ovs_mutex.
 337 */
 338int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
 339{
 340        struct vport_portids *old, *vport_portids;
 341
 342        if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
 343                return -EINVAL;
 344
 345        old = ovsl_dereference(vport->upcall_portids);
 346
 347        vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
 348                                GFP_KERNEL);
 349        if (!vport_portids)
 350                return -ENOMEM;
 351
 352        vport_portids->n_ids = nla_len(ids) / sizeof(u32);
 353        vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
 354        nla_memcpy(vport_portids->ids, ids, nla_len(ids));
 355
 356        rcu_assign_pointer(vport->upcall_portids, vport_portids);
 357
 358        if (old)
 359                kfree_rcu(old, rcu);
 360        return 0;
 361}
 362
 363/**
 364 *      ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
 365 *
 366 * @vport: vport from which to retrieve the portids.
 367 * @skb: sk_buff where portids should be appended.
 368 *
 369 * Retrieves the configuration of the given vport, appending the
 370 * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
 371 * portids to @skb.
 372 *
 373 * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
 374 * If an error occurs, @skb is left unmodified.  Must be called with
 375 * ovs_mutex or rcu_read_lock.
 376 */
 377int ovs_vport_get_upcall_portids(const struct vport *vport,
 378                                 struct sk_buff *skb)
 379{
 380        struct vport_portids *ids;
 381
 382        ids = rcu_dereference_ovsl(vport->upcall_portids);
 383
 384        if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
 385                return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
 386                               ids->n_ids * sizeof(u32), (void *)ids->ids);
 387        else
 388                return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
 389}
 390
 391/**
 392 *      ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
 393 *
 394 * @vport: vport from which the missed packet is received.
 395 * @skb: skb that the missed packet was received.
 396 *
 397 * Uses the skb_get_hash() to select the upcall portid to send the
 398 * upcall.
 399 *
 400 * Returns the portid of the target socket.  Must be called with rcu_read_lock.
 401 */
 402u32 ovs_vport_find_upcall_portid(const struct vport *vport,
 403                                 struct sk_buff *skb)
 404{
 405        struct vport_portids *ids;
 406        u32 ids_index;
 407        u32 hash;
 408
 409        ids = rcu_dereference(vport->upcall_portids);
 410
 411        /* If there is only one portid, select it in the fast-path. */
 412        if (ids->n_ids == 1)
 413                return ids->ids[0];
 414
 415        hash = skb_get_hash(skb);
 416        ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
 417        return ids->ids[ids_index];
 418}
 419
 420/**
 421 *      ovs_vport_receive - pass up received packet to the datapath for processing
 422 *
 423 * @vport: vport that received the packet
 424 * @skb: skb that was received
 425 * @tun_info: tunnel (if any) that carried packet
 426 *
 427 * Must be called with rcu_read_lock.  The packet cannot be shared and
 428 * skb->data should point to the Ethernet header.
 429 */
 430int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 431                      const struct ip_tunnel_info *tun_info)
 432{
 433        struct sw_flow_key key;
 434        int error;
 435
 436        OVS_CB(skb)->input_vport = vport;
 437        OVS_CB(skb)->mru = 0;
 438        OVS_CB(skb)->cutlen = 0;
 439        if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
 440                u32 mark;
 441
 442                mark = skb->mark;
 443                skb_scrub_packet(skb, true);
 444                skb->mark = mark;
 445                tun_info = NULL;
 446        }
 447
 448        /* Extract flow from 'skb' into 'key'. */
 449        error = ovs_flow_key_extract(tun_info, skb, &key);
 450        if (unlikely(error)) {
 451                kfree_skb(skb);
 452                return error;
 453        }
 454        ovs_dp_process_packet(skb, &key);
 455        return 0;
 456}
 457
 458static int packet_length(const struct sk_buff *skb,
 459                         struct net_device *dev)
 460{
 461        int length = skb->len - dev->hard_header_len;
 462
 463        if (!skb_vlan_tag_present(skb) &&
 464            eth_type_vlan(skb->protocol))
 465                length -= VLAN_HLEN;
 466
 467        /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
 468         * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
 469         * account for 802.1ad. e.g. is_skb_forwardable().
 470         */
 471
 472        return length > 0 ? length : 0;
 473}
 474
 475void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
 476{
 477        int mtu = vport->dev->mtu;
 478
 479        switch (vport->dev->type) {
 480        case ARPHRD_NONE:
 481                if (mac_proto == MAC_PROTO_ETHERNET) {
 482                        skb_reset_network_header(skb);
 483                        skb_reset_mac_len(skb);
 484                        skb->protocol = htons(ETH_P_TEB);
 485                } else if (mac_proto != MAC_PROTO_NONE) {
 486                        WARN_ON_ONCE(1);
 487                        goto drop;
 488                }
 489                break;
 490        case ARPHRD_ETHER:
 491                if (mac_proto != MAC_PROTO_ETHERNET)
 492                        goto drop;
 493                break;
 494        default:
 495                goto drop;
 496        }
 497
 498        if (unlikely(packet_length(skb, vport->dev) > mtu &&
 499                     !skb_is_gso(skb))) {
 500                vport->dev->stats.tx_errors++;
 501                if (vport->dev->flags & IFF_UP)
 502                        net_warn_ratelimited("%s: dropped over-mtu packet: "
 503                                             "%d > %d\n", vport->dev->name,
 504                                             packet_length(skb, vport->dev),
 505                                             mtu);
 506                goto drop;
 507        }
 508
 509        skb->dev = vport->dev;
 510        skb->tstamp = 0;
 511        vport->ops->send(skb);
 512        return;
 513
 514drop:
 515        kfree_skb(skb);
 516}
 517