linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2012 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/openvswitch.h>
  48#include <linux/rculist.h>
  49#include <linux/dmi.h>
  50#include <linux/workqueue.h>
  51#include <net/genetlink.h>
  52#include <net/net_namespace.h>
  53#include <net/netns/generic.h>
  54
  55#include "datapath.h"
  56#include "flow.h"
  57#include "vport-internal_dev.h"
  58
  59/**
  60 * struct ovs_net - Per net-namespace data for ovs.
  61 * @dps: List of datapaths to enable dumping them all out.
  62 * Protected by genl_mutex.
  63 */
  64struct ovs_net {
  65        struct list_head dps;
  66};
  67
  68static int ovs_net_id __read_mostly;
  69
  70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
  71static void rehash_flow_table(struct work_struct *work);
  72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
  73
  74/**
  75 * DOC: Locking:
  76 *
  77 * Writes to device state (add/remove datapath, port, set operations on vports,
  78 * etc.) are protected by RTNL.
  79 *
  80 * Writes to other state (flow table modifications, set miscellaneous datapath
  81 * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
  82 * genl_mutex.
  83 *
  84 * Reads are protected by RCU.
  85 *
  86 * There are a few special cases (mostly stats) that have their own
  87 * synchronization but they nest under all of above and don't interact with
  88 * each other.
  89 */
  90
  91static struct vport *new_vport(const struct vport_parms *);
  92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
  93                             const struct dp_upcall_info *);
  94static int queue_userspace_packet(struct net *, int dp_ifindex,
  95                                  struct sk_buff *,
  96                                  const struct dp_upcall_info *);
  97
  98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
  99static struct datapath *get_dp(struct net *net, int dp_ifindex)
 100{
 101        struct datapath *dp = NULL;
 102        struct net_device *dev;
 103
 104        rcu_read_lock();
 105        dev = dev_get_by_index_rcu(net, dp_ifindex);
 106        if (dev) {
 107                struct vport *vport = ovs_internal_dev_get_vport(dev);
 108                if (vport)
 109                        dp = vport->dp;
 110        }
 111        rcu_read_unlock();
 112
 113        return dp;
 114}
 115
 116/* Must be called with rcu_read_lock or RTNL lock. */
 117const char *ovs_dp_name(const struct datapath *dp)
 118{
 119        struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
 120        return vport->ops->get_name(vport);
 121}
 122
 123static int get_dpifindex(struct datapath *dp)
 124{
 125        struct vport *local;
 126        int ifindex;
 127
 128        rcu_read_lock();
 129
 130        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 131        if (local)
 132                ifindex = local->ops->get_ifindex(local);
 133        else
 134                ifindex = 0;
 135
 136        rcu_read_unlock();
 137
 138        return ifindex;
 139}
 140
 141static void destroy_dp_rcu(struct rcu_head *rcu)
 142{
 143        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 144
 145        ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
 146        free_percpu(dp->stats_percpu);
 147        release_net(ovs_dp_get_net(dp));
 148        kfree(dp->ports);
 149        kfree(dp);
 150}
 151
 152static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 153                                            u16 port_no)
 154{
 155        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 156}
 157
 158struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 159{
 160        struct vport *vport;
 161        struct hlist_head *head;
 162
 163        head = vport_hash_bucket(dp, port_no);
 164        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 165                if (vport->port_no == port_no)
 166                        return vport;
 167        }
 168        return NULL;
 169}
 170
 171/* Called with RTNL lock and genl_lock. */
 172static struct vport *new_vport(const struct vport_parms *parms)
 173{
 174        struct vport *vport;
 175
 176        vport = ovs_vport_add(parms);
 177        if (!IS_ERR(vport)) {
 178                struct datapath *dp = parms->dp;
 179                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 180
 181                hlist_add_head_rcu(&vport->dp_hash_node, head);
 182        }
 183
 184        return vport;
 185}
 186
 187/* Called with RTNL lock. */
 188void ovs_dp_detach_port(struct vport *p)
 189{
 190        ASSERT_RTNL();
 191
 192        /* First drop references to device. */
 193        hlist_del_rcu(&p->dp_hash_node);
 194
 195        /* Then destroy it. */
 196        ovs_vport_del(p);
 197}
 198
 199/* Must be called with rcu_read_lock. */
 200void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 201{
 202        struct datapath *dp = p->dp;
 203        struct sw_flow *flow;
 204        struct dp_stats_percpu *stats;
 205        struct sw_flow_key key;
 206        u64 *stats_counter;
 207        int error;
 208        int key_len;
 209
 210        stats = this_cpu_ptr(dp->stats_percpu);
 211
 212        /* Extract flow from 'skb' into 'key'. */
 213        error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
 214        if (unlikely(error)) {
 215                kfree_skb(skb);
 216                return;
 217        }
 218
 219        /* Look up flow. */
 220        flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
 221        if (unlikely(!flow)) {
 222                struct dp_upcall_info upcall;
 223
 224                upcall.cmd = OVS_PACKET_CMD_MISS;
 225                upcall.key = &key;
 226                upcall.userdata = NULL;
 227                upcall.portid = p->upcall_portid;
 228                ovs_dp_upcall(dp, skb, &upcall);
 229                consume_skb(skb);
 230                stats_counter = &stats->n_missed;
 231                goto out;
 232        }
 233
 234        OVS_CB(skb)->flow = flow;
 235
 236        stats_counter = &stats->n_hit;
 237        ovs_flow_used(OVS_CB(skb)->flow, skb);
 238        ovs_execute_actions(dp, skb);
 239
 240out:
 241        /* Update datapath statistics. */
 242        u64_stats_update_begin(&stats->sync);
 243        (*stats_counter)++;
 244        u64_stats_update_end(&stats->sync);
 245}
 246
 247static struct genl_family dp_packet_genl_family = {
 248        .id = GENL_ID_GENERATE,
 249        .hdrsize = sizeof(struct ovs_header),
 250        .name = OVS_PACKET_FAMILY,
 251        .version = OVS_PACKET_VERSION,
 252        .maxattr = OVS_PACKET_ATTR_MAX,
 253        .netnsok = true
 254};
 255
 256int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 257                  const struct dp_upcall_info *upcall_info)
 258{
 259        struct dp_stats_percpu *stats;
 260        int dp_ifindex;
 261        int err;
 262
 263        if (upcall_info->portid == 0) {
 264                err = -ENOTCONN;
 265                goto err;
 266        }
 267
 268        dp_ifindex = get_dpifindex(dp);
 269        if (!dp_ifindex) {
 270                err = -ENODEV;
 271                goto err;
 272        }
 273
 274        if (!skb_is_gso(skb))
 275                err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 276        else
 277                err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 278        if (err)
 279                goto err;
 280
 281        return 0;
 282
 283err:
 284        stats = this_cpu_ptr(dp->stats_percpu);
 285
 286        u64_stats_update_begin(&stats->sync);
 287        stats->n_lost++;
 288        u64_stats_update_end(&stats->sync);
 289
 290        return err;
 291}
 292
 293static int queue_gso_packets(struct net *net, int dp_ifindex,
 294                             struct sk_buff *skb,
 295                             const struct dp_upcall_info *upcall_info)
 296{
 297        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 298        struct dp_upcall_info later_info;
 299        struct sw_flow_key later_key;
 300        struct sk_buff *segs, *nskb;
 301        int err;
 302
 303        segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
 304        if (IS_ERR(segs))
 305                return PTR_ERR(segs);
 306
 307        /* Queue all of the segments. */
 308        skb = segs;
 309        do {
 310                err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
 311                if (err)
 312                        break;
 313
 314                if (skb == segs && gso_type & SKB_GSO_UDP) {
 315                        /* The initial flow key extracted by ovs_flow_extract()
 316                         * in this case is for a first fragment, so we need to
 317                         * properly mark later fragments.
 318                         */
 319                        later_key = *upcall_info->key;
 320                        later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 321
 322                        later_info = *upcall_info;
 323                        later_info.key = &later_key;
 324                        upcall_info = &later_info;
 325                }
 326        } while ((skb = skb->next));
 327
 328        /* Free all of the segments. */
 329        skb = segs;
 330        do {
 331                nskb = skb->next;
 332                if (err)
 333                        kfree_skb(skb);
 334                else
 335                        consume_skb(skb);
 336        } while ((skb = nskb));
 337        return err;
 338}
 339
 340static int queue_userspace_packet(struct net *net, int dp_ifindex,
 341                                  struct sk_buff *skb,
 342                                  const struct dp_upcall_info *upcall_info)
 343{
 344        struct ovs_header *upcall;
 345        struct sk_buff *nskb = NULL;
 346        struct sk_buff *user_skb; /* to be queued to userspace */
 347        struct nlattr *nla;
 348        unsigned int len;
 349        int err;
 350
 351        if (vlan_tx_tag_present(skb)) {
 352                nskb = skb_clone(skb, GFP_ATOMIC);
 353                if (!nskb)
 354                        return -ENOMEM;
 355
 356                nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
 357                if (!nskb)
 358                        return -ENOMEM;
 359
 360                nskb->vlan_tci = 0;
 361                skb = nskb;
 362        }
 363
 364        if (nla_attr_size(skb->len) > USHRT_MAX) {
 365                err = -EFBIG;
 366                goto out;
 367        }
 368
 369        len = sizeof(struct ovs_header);
 370        len += nla_total_size(skb->len);
 371        len += nla_total_size(FLOW_BUFSIZE);
 372        if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
 373                len += nla_total_size(8);
 374
 375        user_skb = genlmsg_new(len, GFP_ATOMIC);
 376        if (!user_skb) {
 377                err = -ENOMEM;
 378                goto out;
 379        }
 380
 381        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 382                             0, upcall_info->cmd);
 383        upcall->dp_ifindex = dp_ifindex;
 384
 385        nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
 386        ovs_flow_to_nlattrs(upcall_info->key, user_skb);
 387        nla_nest_end(user_skb, nla);
 388
 389        if (upcall_info->userdata)
 390                nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
 391                            nla_get_u64(upcall_info->userdata));
 392
 393        nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
 394
 395        skb_copy_and_csum_dev(skb, nla_data(nla));
 396
 397        genlmsg_end(user_skb, upcall);
 398        err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 399
 400out:
 401        kfree_skb(nskb);
 402        return err;
 403}
 404
 405/* Called with genl_mutex. */
 406static int flush_flows(struct datapath *dp)
 407{
 408        struct flow_table *old_table;
 409        struct flow_table *new_table;
 410
 411        old_table = genl_dereference(dp->table);
 412        new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
 413        if (!new_table)
 414                return -ENOMEM;
 415
 416        rcu_assign_pointer(dp->table, new_table);
 417
 418        ovs_flow_tbl_deferred_destroy(old_table);
 419        return 0;
 420}
 421
 422static int validate_actions(const struct nlattr *attr,
 423                                const struct sw_flow_key *key, int depth);
 424
 425static int validate_sample(const struct nlattr *attr,
 426                                const struct sw_flow_key *key, int depth)
 427{
 428        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
 429        const struct nlattr *probability, *actions;
 430        const struct nlattr *a;
 431        int rem;
 432
 433        memset(attrs, 0, sizeof(attrs));
 434        nla_for_each_nested(a, attr, rem) {
 435                int type = nla_type(a);
 436                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
 437                        return -EINVAL;
 438                attrs[type] = a;
 439        }
 440        if (rem)
 441                return -EINVAL;
 442
 443        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
 444        if (!probability || nla_len(probability) != sizeof(u32))
 445                return -EINVAL;
 446
 447        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
 448        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
 449                return -EINVAL;
 450        return validate_actions(actions, key, depth + 1);
 451}
 452
 453static int validate_tp_port(const struct sw_flow_key *flow_key)
 454{
 455        if (flow_key->eth.type == htons(ETH_P_IP)) {
 456                if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
 457                        return 0;
 458        } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
 459                if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
 460                        return 0;
 461        }
 462
 463        return -EINVAL;
 464}
 465
 466static int validate_set(const struct nlattr *a,
 467                        const struct sw_flow_key *flow_key)
 468{
 469        const struct nlattr *ovs_key = nla_data(a);
 470        int key_type = nla_type(ovs_key);
 471
 472        /* There can be only one key in a action */
 473        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 474                return -EINVAL;
 475
 476        if (key_type > OVS_KEY_ATTR_MAX ||
 477            nla_len(ovs_key) != ovs_key_lens[key_type])
 478                return -EINVAL;
 479
 480        switch (key_type) {
 481        const struct ovs_key_ipv4 *ipv4_key;
 482        const struct ovs_key_ipv6 *ipv6_key;
 483
 484        case OVS_KEY_ATTR_PRIORITY:
 485        case OVS_KEY_ATTR_SKB_MARK:
 486        case OVS_KEY_ATTR_ETHERNET:
 487                break;
 488
 489        case OVS_KEY_ATTR_IPV4:
 490                if (flow_key->eth.type != htons(ETH_P_IP))
 491                        return -EINVAL;
 492
 493                if (!flow_key->ip.proto)
 494                        return -EINVAL;
 495
 496                ipv4_key = nla_data(ovs_key);
 497                if (ipv4_key->ipv4_proto != flow_key->ip.proto)
 498                        return -EINVAL;
 499
 500                if (ipv4_key->ipv4_frag != flow_key->ip.frag)
 501                        return -EINVAL;
 502
 503                break;
 504
 505        case OVS_KEY_ATTR_IPV6:
 506                if (flow_key->eth.type != htons(ETH_P_IPV6))
 507                        return -EINVAL;
 508
 509                if (!flow_key->ip.proto)
 510                        return -EINVAL;
 511
 512                ipv6_key = nla_data(ovs_key);
 513                if (ipv6_key->ipv6_proto != flow_key->ip.proto)
 514                        return -EINVAL;
 515
 516                if (ipv6_key->ipv6_frag != flow_key->ip.frag)
 517                        return -EINVAL;
 518
 519                if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
 520                        return -EINVAL;
 521
 522                break;
 523
 524        case OVS_KEY_ATTR_TCP:
 525                if (flow_key->ip.proto != IPPROTO_TCP)
 526                        return -EINVAL;
 527
 528                return validate_tp_port(flow_key);
 529
 530        case OVS_KEY_ATTR_UDP:
 531                if (flow_key->ip.proto != IPPROTO_UDP)
 532                        return -EINVAL;
 533
 534                return validate_tp_port(flow_key);
 535
 536        default:
 537                return -EINVAL;
 538        }
 539
 540        return 0;
 541}
 542
 543static int validate_userspace(const struct nlattr *attr)
 544{
 545        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =   {
 546                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
 547                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
 548        };
 549        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 550        int error;
 551
 552        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
 553                                 attr, userspace_policy);
 554        if (error)
 555                return error;
 556
 557        if (!a[OVS_USERSPACE_ATTR_PID] ||
 558            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
 559                return -EINVAL;
 560
 561        return 0;
 562}
 563
 564static int validate_actions(const struct nlattr *attr,
 565                                const struct sw_flow_key *key,  int depth)
 566{
 567        const struct nlattr *a;
 568        int rem, err;
 569
 570        if (depth >= SAMPLE_ACTION_DEPTH)
 571                return -EOVERFLOW;
 572
 573        nla_for_each_nested(a, attr, rem) {
 574                /* Expected argument lengths, (u32)-1 for variable length. */
 575                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 576                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 577                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
 578                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 579                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
 580                        [OVS_ACTION_ATTR_SET] = (u32)-1,
 581                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
 582                };
 583                const struct ovs_action_push_vlan *vlan;
 584                int type = nla_type(a);
 585
 586                if (type > OVS_ACTION_ATTR_MAX ||
 587                    (action_lens[type] != nla_len(a) &&
 588                     action_lens[type] != (u32)-1))
 589                        return -EINVAL;
 590
 591                switch (type) {
 592                case OVS_ACTION_ATTR_UNSPEC:
 593                        return -EINVAL;
 594
 595                case OVS_ACTION_ATTR_USERSPACE:
 596                        err = validate_userspace(a);
 597                        if (err)
 598                                return err;
 599                        break;
 600
 601                case OVS_ACTION_ATTR_OUTPUT:
 602                        if (nla_get_u32(a) >= DP_MAX_PORTS)
 603                                return -EINVAL;
 604                        break;
 605
 606
 607                case OVS_ACTION_ATTR_POP_VLAN:
 608                        break;
 609
 610                case OVS_ACTION_ATTR_PUSH_VLAN:
 611                        vlan = nla_data(a);
 612                        if (vlan->vlan_tpid != htons(ETH_P_8021Q))
 613                                return -EINVAL;
 614                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
 615                                return -EINVAL;
 616                        break;
 617
 618                case OVS_ACTION_ATTR_SET:
 619                        err = validate_set(a, key);
 620                        if (err)
 621                                return err;
 622                        break;
 623
 624                case OVS_ACTION_ATTR_SAMPLE:
 625                        err = validate_sample(a, key, depth);
 626                        if (err)
 627                                return err;
 628                        break;
 629
 630                default:
 631                        return -EINVAL;
 632                }
 633        }
 634
 635        if (rem > 0)
 636                return -EINVAL;
 637
 638        return 0;
 639}
 640
 641static void clear_stats(struct sw_flow *flow)
 642{
 643        flow->used = 0;
 644        flow->tcp_flags = 0;
 645        flow->packet_count = 0;
 646        flow->byte_count = 0;
 647}
 648
 649static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 650{
 651        struct ovs_header *ovs_header = info->userhdr;
 652        struct nlattr **a = info->attrs;
 653        struct sw_flow_actions *acts;
 654        struct sk_buff *packet;
 655        struct sw_flow *flow;
 656        struct datapath *dp;
 657        struct ethhdr *eth;
 658        int len;
 659        int err;
 660        int key_len;
 661
 662        err = -EINVAL;
 663        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 664            !a[OVS_PACKET_ATTR_ACTIONS] ||
 665            nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
 666                goto err;
 667
 668        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 669        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 670        err = -ENOMEM;
 671        if (!packet)
 672                goto err;
 673        skb_reserve(packet, NET_IP_ALIGN);
 674
 675        memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
 676
 677        skb_reset_mac_header(packet);
 678        eth = eth_hdr(packet);
 679
 680        /* Normally, setting the skb 'protocol' field would be handled by a
 681         * call to eth_type_trans(), but it assumes there's a sending
 682         * device, which we may not have. */
 683        if (ntohs(eth->h_proto) >= 1536)
 684                packet->protocol = eth->h_proto;
 685        else
 686                packet->protocol = htons(ETH_P_802_2);
 687
 688        /* Build an sw_flow for sending this packet. */
 689        flow = ovs_flow_alloc();
 690        err = PTR_ERR(flow);
 691        if (IS_ERR(flow))
 692                goto err_kfree_skb;
 693
 694        err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
 695        if (err)
 696                goto err_flow_free;
 697
 698        err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
 699                                             &flow->key.phy.skb_mark,
 700                                             &flow->key.phy.in_port,
 701                                             a[OVS_PACKET_ATTR_KEY]);
 702        if (err)
 703                goto err_flow_free;
 704
 705        err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
 706        if (err)
 707                goto err_flow_free;
 708
 709        flow->hash = ovs_flow_hash(&flow->key, key_len);
 710
 711        acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
 712        err = PTR_ERR(acts);
 713        if (IS_ERR(acts))
 714                goto err_flow_free;
 715        rcu_assign_pointer(flow->sf_acts, acts);
 716
 717        OVS_CB(packet)->flow = flow;
 718        packet->priority = flow->key.phy.priority;
 719        packet->mark = flow->key.phy.skb_mark;
 720
 721        rcu_read_lock();
 722        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 723        err = -ENODEV;
 724        if (!dp)
 725                goto err_unlock;
 726
 727        local_bh_disable();
 728        err = ovs_execute_actions(dp, packet);
 729        local_bh_enable();
 730        rcu_read_unlock();
 731
 732        ovs_flow_free(flow);
 733        return err;
 734
 735err_unlock:
 736        rcu_read_unlock();
 737err_flow_free:
 738        ovs_flow_free(flow);
 739err_kfree_skb:
 740        kfree_skb(packet);
 741err:
 742        return err;
 743}
 744
 745static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 746        [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
 747        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 748        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 749};
 750
 751static struct genl_ops dp_packet_genl_ops[] = {
 752        { .cmd = OVS_PACKET_CMD_EXECUTE,
 753          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 754          .policy = packet_policy,
 755          .doit = ovs_packet_cmd_execute
 756        }
 757};
 758
 759static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 760{
 761        int i;
 762        struct flow_table *table = genl_dereference(dp->table);
 763
 764        stats->n_flows = ovs_flow_tbl_count(table);
 765
 766        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 767        for_each_possible_cpu(i) {
 768                const struct dp_stats_percpu *percpu_stats;
 769                struct dp_stats_percpu local_stats;
 770                unsigned int start;
 771
 772                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 773
 774                do {
 775                        start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
 776                        local_stats = *percpu_stats;
 777                } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 778
 779                stats->n_hit += local_stats.n_hit;
 780                stats->n_missed += local_stats.n_missed;
 781                stats->n_lost += local_stats.n_lost;
 782        }
 783}
 784
 785static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 786        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
 787        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 788        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 789};
 790
 791static struct genl_family dp_flow_genl_family = {
 792        .id = GENL_ID_GENERATE,
 793        .hdrsize = sizeof(struct ovs_header),
 794        .name = OVS_FLOW_FAMILY,
 795        .version = OVS_FLOW_VERSION,
 796        .maxattr = OVS_FLOW_ATTR_MAX,
 797        .netnsok = true
 798};
 799
 800static struct genl_multicast_group ovs_dp_flow_multicast_group = {
 801        .name = OVS_FLOW_MCGROUP
 802};
 803
 804/* Called with genl_lock. */
 805static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 806                                  struct sk_buff *skb, u32 portid,
 807                                  u32 seq, u32 flags, u8 cmd)
 808{
 809        const int skb_orig_len = skb->len;
 810        const struct sw_flow_actions *sf_acts;
 811        struct ovs_flow_stats stats;
 812        struct ovs_header *ovs_header;
 813        struct nlattr *nla;
 814        unsigned long used;
 815        u8 tcp_flags;
 816        int err;
 817
 818        sf_acts = rcu_dereference_protected(flow->sf_acts,
 819                                            lockdep_genl_is_held());
 820
 821        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
 822        if (!ovs_header)
 823                return -EMSGSIZE;
 824
 825        ovs_header->dp_ifindex = get_dpifindex(dp);
 826
 827        nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
 828        if (!nla)
 829                goto nla_put_failure;
 830        err = ovs_flow_to_nlattrs(&flow->key, skb);
 831        if (err)
 832                goto error;
 833        nla_nest_end(skb, nla);
 834
 835        spin_lock_bh(&flow->lock);
 836        used = flow->used;
 837        stats.n_packets = flow->packet_count;
 838        stats.n_bytes = flow->byte_count;
 839        tcp_flags = flow->tcp_flags;
 840        spin_unlock_bh(&flow->lock);
 841
 842        if (used &&
 843            nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
 844                goto nla_put_failure;
 845
 846        if (stats.n_packets &&
 847            nla_put(skb, OVS_FLOW_ATTR_STATS,
 848                    sizeof(struct ovs_flow_stats), &stats))
 849                goto nla_put_failure;
 850
 851        if (tcp_flags &&
 852            nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
 853                goto nla_put_failure;
 854
 855        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 856         * this is the first flow to be dumped into 'skb'.  This is unusual for
 857         * Netlink but individual action lists can be longer than
 858         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 859         * The userspace caller can always fetch the actions separately if it
 860         * really wants them.  (Most userspace callers in fact don't care.)
 861         *
 862         * This can only fail for dump operations because the skb is always
 863         * properly sized for single flows.
 864         */
 865        err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
 866                      sf_acts->actions);
 867        if (err < 0 && skb_orig_len)
 868                goto error;
 869
 870        return genlmsg_end(skb, ovs_header);
 871
 872nla_put_failure:
 873        err = -EMSGSIZE;
 874error:
 875        genlmsg_cancel(skb, ovs_header);
 876        return err;
 877}
 878
 879static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
 880{
 881        const struct sw_flow_actions *sf_acts;
 882        int len;
 883
 884        sf_acts = rcu_dereference_protected(flow->sf_acts,
 885                                            lockdep_genl_is_held());
 886
 887        /* OVS_FLOW_ATTR_KEY */
 888        len = nla_total_size(FLOW_BUFSIZE);
 889        /* OVS_FLOW_ATTR_ACTIONS */
 890        len += nla_total_size(sf_acts->actions_len);
 891        /* OVS_FLOW_ATTR_STATS */
 892        len += nla_total_size(sizeof(struct ovs_flow_stats));
 893        /* OVS_FLOW_ATTR_TCP_FLAGS */
 894        len += nla_total_size(1);
 895        /* OVS_FLOW_ATTR_USED */
 896        len += nla_total_size(8);
 897
 898        len += NLMSG_ALIGN(sizeof(struct ovs_header));
 899
 900        return genlmsg_new(len, GFP_KERNEL);
 901}
 902
 903static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
 904                                               struct datapath *dp,
 905                                               u32 portid, u32 seq, u8 cmd)
 906{
 907        struct sk_buff *skb;
 908        int retval;
 909
 910        skb = ovs_flow_cmd_alloc_info(flow);
 911        if (!skb)
 912                return ERR_PTR(-ENOMEM);
 913
 914        retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
 915        BUG_ON(retval < 0);
 916        return skb;
 917}
 918
 919static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 920{
 921        struct nlattr **a = info->attrs;
 922        struct ovs_header *ovs_header = info->userhdr;
 923        struct sw_flow_key key;
 924        struct sw_flow *flow;
 925        struct sk_buff *reply;
 926        struct datapath *dp;
 927        struct flow_table *table;
 928        int error;
 929        int key_len;
 930
 931        /* Extract key. */
 932        error = -EINVAL;
 933        if (!a[OVS_FLOW_ATTR_KEY])
 934                goto error;
 935        error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
 936        if (error)
 937                goto error;
 938
 939        /* Validate actions. */
 940        if (a[OVS_FLOW_ATTR_ACTIONS]) {
 941                error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
 942                if (error)
 943                        goto error;
 944        } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
 945                error = -EINVAL;
 946                goto error;
 947        }
 948
 949        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 950        error = -ENODEV;
 951        if (!dp)
 952                goto error;
 953
 954        table = genl_dereference(dp->table);
 955        flow = ovs_flow_tbl_lookup(table, &key, key_len);
 956        if (!flow) {
 957                struct sw_flow_actions *acts;
 958
 959                /* Bail out if we're not allowed to create a new flow. */
 960                error = -ENOENT;
 961                if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
 962                        goto error;
 963
 964                /* Expand table, if necessary, to make room. */
 965                if (ovs_flow_tbl_need_to_expand(table)) {
 966                        struct flow_table *new_table;
 967
 968                        new_table = ovs_flow_tbl_expand(table);
 969                        if (!IS_ERR(new_table)) {
 970                                rcu_assign_pointer(dp->table, new_table);
 971                                ovs_flow_tbl_deferred_destroy(table);
 972                                table = genl_dereference(dp->table);
 973                        }
 974                }
 975
 976                /* Allocate flow. */
 977                flow = ovs_flow_alloc();
 978                if (IS_ERR(flow)) {
 979                        error = PTR_ERR(flow);
 980                        goto error;
 981                }
 982                flow->key = key;
 983                clear_stats(flow);
 984
 985                /* Obtain actions. */
 986                acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
 987                error = PTR_ERR(acts);
 988                if (IS_ERR(acts))
 989                        goto error_free_flow;
 990                rcu_assign_pointer(flow->sf_acts, acts);
 991
 992                /* Put flow in bucket. */
 993                flow->hash = ovs_flow_hash(&key, key_len);
 994                ovs_flow_tbl_insert(table, flow);
 995
 996                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 997                                                info->snd_seq,
 998                                                OVS_FLOW_CMD_NEW);
 999        } else {
1000                /* We found a matching flow. */
1001                struct sw_flow_actions *old_acts;
1002                struct nlattr *acts_attrs;
1003
1004                /* Bail out if we're not allowed to modify an existing flow.
1005                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1006                 * because Generic Netlink treats the latter as a dump
1007                 * request.  We also accept NLM_F_EXCL in case that bug ever
1008                 * gets fixed.
1009                 */
1010                error = -EEXIST;
1011                if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1012                    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1013                        goto error;
1014
1015                /* Update actions. */
1016                old_acts = rcu_dereference_protected(flow->sf_acts,
1017                                                     lockdep_genl_is_held());
1018                acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1019                if (acts_attrs &&
1020                   (old_acts->actions_len != nla_len(acts_attrs) ||
1021                   memcmp(old_acts->actions, nla_data(acts_attrs),
1022                          old_acts->actions_len))) {
1023                        struct sw_flow_actions *new_acts;
1024
1025                        new_acts = ovs_flow_actions_alloc(acts_attrs);
1026                        error = PTR_ERR(new_acts);
1027                        if (IS_ERR(new_acts))
1028                                goto error;
1029
1030                        rcu_assign_pointer(flow->sf_acts, new_acts);
1031                        ovs_flow_deferred_free_acts(old_acts);
1032                }
1033
1034                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1035                                               info->snd_seq, OVS_FLOW_CMD_NEW);
1036
1037                /* Clear stats. */
1038                if (a[OVS_FLOW_ATTR_CLEAR]) {
1039                        spin_lock_bh(&flow->lock);
1040                        clear_stats(flow);
1041                        spin_unlock_bh(&flow->lock);
1042                }
1043        }
1044
1045        if (!IS_ERR(reply))
1046                genl_notify(reply, genl_info_net(info), info->snd_portid,
1047                           ovs_dp_flow_multicast_group.id, info->nlhdr,
1048                           GFP_KERNEL);
1049        else
1050                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1051                                ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1052        return 0;
1053
1054error_free_flow:
1055        ovs_flow_free(flow);
1056error:
1057        return error;
1058}
1059
1060static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1061{
1062        struct nlattr **a = info->attrs;
1063        struct ovs_header *ovs_header = info->userhdr;
1064        struct sw_flow_key key;
1065        struct sk_buff *reply;
1066        struct sw_flow *flow;
1067        struct datapath *dp;
1068        struct flow_table *table;
1069        int err;
1070        int key_len;
1071
1072        if (!a[OVS_FLOW_ATTR_KEY])
1073                return -EINVAL;
1074        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1075        if (err)
1076                return err;
1077
1078        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1079        if (!dp)
1080                return -ENODEV;
1081
1082        table = genl_dereference(dp->table);
1083        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1084        if (!flow)
1085                return -ENOENT;
1086
1087        reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1088                                        info->snd_seq, OVS_FLOW_CMD_NEW);
1089        if (IS_ERR(reply))
1090                return PTR_ERR(reply);
1091
1092        return genlmsg_reply(reply, info);
1093}
1094
1095static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1096{
1097        struct nlattr **a = info->attrs;
1098        struct ovs_header *ovs_header = info->userhdr;
1099        struct sw_flow_key key;
1100        struct sk_buff *reply;
1101        struct sw_flow *flow;
1102        struct datapath *dp;
1103        struct flow_table *table;
1104        int err;
1105        int key_len;
1106
1107        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1108        if (!dp)
1109                return -ENODEV;
1110
1111        if (!a[OVS_FLOW_ATTR_KEY])
1112                return flush_flows(dp);
1113
1114        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1115        if (err)
1116                return err;
1117
1118        table = genl_dereference(dp->table);
1119        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1120        if (!flow)
1121                return -ENOENT;
1122
1123        reply = ovs_flow_cmd_alloc_info(flow);
1124        if (!reply)
1125                return -ENOMEM;
1126
1127        ovs_flow_tbl_remove(table, flow);
1128
1129        err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1130                                     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1131        BUG_ON(err < 0);
1132
1133        ovs_flow_deferred_free(flow);
1134
1135        genl_notify(reply, genl_info_net(info), info->snd_portid,
1136                    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1137        return 0;
1138}
1139
1140static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1141{
1142        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1143        struct datapath *dp;
1144        struct flow_table *table;
1145
1146        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1147        if (!dp)
1148                return -ENODEV;
1149
1150        table = genl_dereference(dp->table);
1151
1152        for (;;) {
1153                struct sw_flow *flow;
1154                u32 bucket, obj;
1155
1156                bucket = cb->args[0];
1157                obj = cb->args[1];
1158                flow = ovs_flow_tbl_next(table, &bucket, &obj);
1159                if (!flow)
1160                        break;
1161
1162                if (ovs_flow_cmd_fill_info(flow, dp, skb,
1163                                           NETLINK_CB(cb->skb).portid,
1164                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1165                                           OVS_FLOW_CMD_NEW) < 0)
1166                        break;
1167
1168                cb->args[0] = bucket;
1169                cb->args[1] = obj;
1170        }
1171        return skb->len;
1172}
1173
1174static struct genl_ops dp_flow_genl_ops[] = {
1175        { .cmd = OVS_FLOW_CMD_NEW,
1176          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1177          .policy = flow_policy,
1178          .doit = ovs_flow_cmd_new_or_set
1179        },
1180        { .cmd = OVS_FLOW_CMD_DEL,
1181          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1182          .policy = flow_policy,
1183          .doit = ovs_flow_cmd_del
1184        },
1185        { .cmd = OVS_FLOW_CMD_GET,
1186          .flags = 0,               /* OK for unprivileged users. */
1187          .policy = flow_policy,
1188          .doit = ovs_flow_cmd_get,
1189          .dumpit = ovs_flow_cmd_dump
1190        },
1191        { .cmd = OVS_FLOW_CMD_SET,
1192          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1193          .policy = flow_policy,
1194          .doit = ovs_flow_cmd_new_or_set,
1195        },
1196};
1197
1198static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1199        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1200        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1201};
1202
1203static struct genl_family dp_datapath_genl_family = {
1204        .id = GENL_ID_GENERATE,
1205        .hdrsize = sizeof(struct ovs_header),
1206        .name = OVS_DATAPATH_FAMILY,
1207        .version = OVS_DATAPATH_VERSION,
1208        .maxattr = OVS_DP_ATTR_MAX,
1209        .netnsok = true
1210};
1211
1212static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1213        .name = OVS_DATAPATH_MCGROUP
1214};
1215
1216static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1217                                u32 portid, u32 seq, u32 flags, u8 cmd)
1218{
1219        struct ovs_header *ovs_header;
1220        struct ovs_dp_stats dp_stats;
1221        int err;
1222
1223        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1224                                   flags, cmd);
1225        if (!ovs_header)
1226                goto error;
1227
1228        ovs_header->dp_ifindex = get_dpifindex(dp);
1229
1230        rcu_read_lock();
1231        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1232        rcu_read_unlock();
1233        if (err)
1234                goto nla_put_failure;
1235
1236        get_dp_stats(dp, &dp_stats);
1237        if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1238                goto nla_put_failure;
1239
1240        return genlmsg_end(skb, ovs_header);
1241
1242nla_put_failure:
1243        genlmsg_cancel(skb, ovs_header);
1244error:
1245        return -EMSGSIZE;
1246}
1247
1248static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1249                                             u32 seq, u8 cmd)
1250{
1251        struct sk_buff *skb;
1252        int retval;
1253
1254        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1255        if (!skb)
1256                return ERR_PTR(-ENOMEM);
1257
1258        retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1259        if (retval < 0) {
1260                kfree_skb(skb);
1261                return ERR_PTR(retval);
1262        }
1263        return skb;
1264}
1265
1266/* Called with genl_mutex and optionally with RTNL lock also. */
1267static struct datapath *lookup_datapath(struct net *net,
1268                                        struct ovs_header *ovs_header,
1269                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1270{
1271        struct datapath *dp;
1272
1273        if (!a[OVS_DP_ATTR_NAME])
1274                dp = get_dp(net, ovs_header->dp_ifindex);
1275        else {
1276                struct vport *vport;
1277
1278                rcu_read_lock();
1279                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1280                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1281                rcu_read_unlock();
1282        }
1283        return dp ? dp : ERR_PTR(-ENODEV);
1284}
1285
1286static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1287{
1288        struct nlattr **a = info->attrs;
1289        struct vport_parms parms;
1290        struct sk_buff *reply;
1291        struct datapath *dp;
1292        struct vport *vport;
1293        struct ovs_net *ovs_net;
1294        int err, i;
1295
1296        err = -EINVAL;
1297        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1298                goto err;
1299
1300        rtnl_lock();
1301
1302        err = -ENOMEM;
1303        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1304        if (dp == NULL)
1305                goto err_unlock_rtnl;
1306
1307        ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1308
1309        /* Allocate table. */
1310        err = -ENOMEM;
1311        rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1312        if (!dp->table)
1313                goto err_free_dp;
1314
1315        dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1316        if (!dp->stats_percpu) {
1317                err = -ENOMEM;
1318                goto err_destroy_table;
1319        }
1320
1321        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1322                        GFP_KERNEL);
1323        if (!dp->ports) {
1324                err = -ENOMEM;
1325                goto err_destroy_percpu;
1326        }
1327
1328        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1329                INIT_HLIST_HEAD(&dp->ports[i]);
1330
1331        /* Set up our datapath device. */
1332        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1333        parms.type = OVS_VPORT_TYPE_INTERNAL;
1334        parms.options = NULL;
1335        parms.dp = dp;
1336        parms.port_no = OVSP_LOCAL;
1337        parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1338
1339        vport = new_vport(&parms);
1340        if (IS_ERR(vport)) {
1341                err = PTR_ERR(vport);
1342                if (err == -EBUSY)
1343                        err = -EEXIST;
1344
1345                goto err_destroy_ports_array;
1346        }
1347
1348        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1349                                      info->snd_seq, OVS_DP_CMD_NEW);
1350        err = PTR_ERR(reply);
1351        if (IS_ERR(reply))
1352                goto err_destroy_local_port;
1353
1354        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1355        list_add_tail(&dp->list_node, &ovs_net->dps);
1356        rtnl_unlock();
1357
1358        genl_notify(reply, genl_info_net(info), info->snd_portid,
1359                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1360                    GFP_KERNEL);
1361        return 0;
1362
1363err_destroy_local_port:
1364        ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1365err_destroy_ports_array:
1366        kfree(dp->ports);
1367err_destroy_percpu:
1368        free_percpu(dp->stats_percpu);
1369err_destroy_table:
1370        ovs_flow_tbl_destroy(genl_dereference(dp->table));
1371err_free_dp:
1372        release_net(ovs_dp_get_net(dp));
1373        kfree(dp);
1374err_unlock_rtnl:
1375        rtnl_unlock();
1376err:
1377        return err;
1378}
1379
1380/* Called with genl_mutex. */
1381static void __dp_destroy(struct datapath *dp)
1382{
1383        int i;
1384
1385        rtnl_lock();
1386
1387        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1388                struct vport *vport;
1389                struct hlist_node *n;
1390
1391                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1392                        if (vport->port_no != OVSP_LOCAL)
1393                                ovs_dp_detach_port(vport);
1394        }
1395
1396        list_del(&dp->list_node);
1397        ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1398
1399        /* rtnl_unlock() will wait until all the references to devices that
1400         * are pending unregistration have been dropped.  We do it here to
1401         * ensure that any internal devices (which contain DP pointers) are
1402         * fully destroyed before freeing the datapath.
1403         */
1404        rtnl_unlock();
1405
1406        call_rcu(&dp->rcu, destroy_dp_rcu);
1407}
1408
1409static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1410{
1411        struct sk_buff *reply;
1412        struct datapath *dp;
1413        int err;
1414
1415        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1416        err = PTR_ERR(dp);
1417        if (IS_ERR(dp))
1418                return err;
1419
1420        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1421                                      info->snd_seq, OVS_DP_CMD_DEL);
1422        err = PTR_ERR(reply);
1423        if (IS_ERR(reply))
1424                return err;
1425
1426        __dp_destroy(dp);
1427
1428        genl_notify(reply, genl_info_net(info), info->snd_portid,
1429                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1430                    GFP_KERNEL);
1431
1432        return 0;
1433}
1434
1435static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1436{
1437        struct sk_buff *reply;
1438        struct datapath *dp;
1439        int err;
1440
1441        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1442        if (IS_ERR(dp))
1443                return PTR_ERR(dp);
1444
1445        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1446                                      info->snd_seq, OVS_DP_CMD_NEW);
1447        if (IS_ERR(reply)) {
1448                err = PTR_ERR(reply);
1449                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1450                                ovs_dp_datapath_multicast_group.id, err);
1451                return 0;
1452        }
1453
1454        genl_notify(reply, genl_info_net(info), info->snd_portid,
1455                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1456                    GFP_KERNEL);
1457
1458        return 0;
1459}
1460
1461static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1462{
1463        struct sk_buff *reply;
1464        struct datapath *dp;
1465
1466        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1467        if (IS_ERR(dp))
1468                return PTR_ERR(dp);
1469
1470        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1471                                      info->snd_seq, OVS_DP_CMD_NEW);
1472        if (IS_ERR(reply))
1473                return PTR_ERR(reply);
1474
1475        return genlmsg_reply(reply, info);
1476}
1477
1478static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1479{
1480        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1481        struct datapath *dp;
1482        int skip = cb->args[0];
1483        int i = 0;
1484
1485        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1486                if (i >= skip &&
1487                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1488                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1489                                         OVS_DP_CMD_NEW) < 0)
1490                        break;
1491                i++;
1492        }
1493
1494        cb->args[0] = i;
1495
1496        return skb->len;
1497}
1498
1499static struct genl_ops dp_datapath_genl_ops[] = {
1500        { .cmd = OVS_DP_CMD_NEW,
1501          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1502          .policy = datapath_policy,
1503          .doit = ovs_dp_cmd_new
1504        },
1505        { .cmd = OVS_DP_CMD_DEL,
1506          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1507          .policy = datapath_policy,
1508          .doit = ovs_dp_cmd_del
1509        },
1510        { .cmd = OVS_DP_CMD_GET,
1511          .flags = 0,               /* OK for unprivileged users. */
1512          .policy = datapath_policy,
1513          .doit = ovs_dp_cmd_get,
1514          .dumpit = ovs_dp_cmd_dump
1515        },
1516        { .cmd = OVS_DP_CMD_SET,
1517          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1518          .policy = datapath_policy,
1519          .doit = ovs_dp_cmd_set,
1520        },
1521};
1522
1523static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1524        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1525        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1526        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1527        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1528        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1529        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1530};
1531
1532static struct genl_family dp_vport_genl_family = {
1533        .id = GENL_ID_GENERATE,
1534        .hdrsize = sizeof(struct ovs_header),
1535        .name = OVS_VPORT_FAMILY,
1536        .version = OVS_VPORT_VERSION,
1537        .maxattr = OVS_VPORT_ATTR_MAX,
1538        .netnsok = true
1539};
1540
1541struct genl_multicast_group ovs_dp_vport_multicast_group = {
1542        .name = OVS_VPORT_MCGROUP
1543};
1544
1545/* Called with RTNL lock or RCU read lock. */
1546static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1547                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1548{
1549        struct ovs_header *ovs_header;
1550        struct ovs_vport_stats vport_stats;
1551        int err;
1552
1553        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1554                                 flags, cmd);
1555        if (!ovs_header)
1556                return -EMSGSIZE;
1557
1558        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1559
1560        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1561            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1562            nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1563            nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1564                goto nla_put_failure;
1565
1566        ovs_vport_get_stats(vport, &vport_stats);
1567        if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1568                    &vport_stats))
1569                goto nla_put_failure;
1570
1571        err = ovs_vport_get_options(vport, skb);
1572        if (err == -EMSGSIZE)
1573                goto error;
1574
1575        return genlmsg_end(skb, ovs_header);
1576
1577nla_put_failure:
1578        err = -EMSGSIZE;
1579error:
1580        genlmsg_cancel(skb, ovs_header);
1581        return err;
1582}
1583
1584/* Called with RTNL lock or RCU read lock. */
1585struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1586                                         u32 seq, u8 cmd)
1587{
1588        struct sk_buff *skb;
1589        int retval;
1590
1591        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1592        if (!skb)
1593                return ERR_PTR(-ENOMEM);
1594
1595        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1596        BUG_ON(retval < 0);
1597
1598        return skb;
1599}
1600
1601/* Called with RTNL lock or RCU read lock. */
1602static struct vport *lookup_vport(struct net *net,
1603                                  struct ovs_header *ovs_header,
1604                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1605{
1606        struct datapath *dp;
1607        struct vport *vport;
1608
1609        if (a[OVS_VPORT_ATTR_NAME]) {
1610                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1611                if (!vport)
1612                        return ERR_PTR(-ENODEV);
1613                if (ovs_header->dp_ifindex &&
1614                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1615                        return ERR_PTR(-ENODEV);
1616                return vport;
1617        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1618                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1619
1620                if (port_no >= DP_MAX_PORTS)
1621                        return ERR_PTR(-EFBIG);
1622
1623                dp = get_dp(net, ovs_header->dp_ifindex);
1624                if (!dp)
1625                        return ERR_PTR(-ENODEV);
1626
1627                vport = ovs_vport_rtnl_rcu(dp, port_no);
1628                if (!vport)
1629                        return ERR_PTR(-ENOENT);
1630                return vport;
1631        } else
1632                return ERR_PTR(-EINVAL);
1633}
1634
1635static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1636{
1637        struct nlattr **a = info->attrs;
1638        struct ovs_header *ovs_header = info->userhdr;
1639        struct vport_parms parms;
1640        struct sk_buff *reply;
1641        struct vport *vport;
1642        struct datapath *dp;
1643        u32 port_no;
1644        int err;
1645
1646        err = -EINVAL;
1647        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1648            !a[OVS_VPORT_ATTR_UPCALL_PID])
1649                goto exit;
1650
1651        rtnl_lock();
1652        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1653        err = -ENODEV;
1654        if (!dp)
1655                goto exit_unlock;
1656
1657        if (a[OVS_VPORT_ATTR_PORT_NO]) {
1658                port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1659
1660                err = -EFBIG;
1661                if (port_no >= DP_MAX_PORTS)
1662                        goto exit_unlock;
1663
1664                vport = ovs_vport_rtnl_rcu(dp, port_no);
1665                err = -EBUSY;
1666                if (vport)
1667                        goto exit_unlock;
1668        } else {
1669                for (port_no = 1; ; port_no++) {
1670                        if (port_no >= DP_MAX_PORTS) {
1671                                err = -EFBIG;
1672                                goto exit_unlock;
1673                        }
1674                        vport = ovs_vport_rtnl(dp, port_no);
1675                        if (!vport)
1676                                break;
1677                }
1678        }
1679
1680        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1681        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1682        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1683        parms.dp = dp;
1684        parms.port_no = port_no;
1685        parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1686
1687        vport = new_vport(&parms);
1688        err = PTR_ERR(vport);
1689        if (IS_ERR(vport))
1690                goto exit_unlock;
1691
1692        err = 0;
1693        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1694                                         OVS_VPORT_CMD_NEW);
1695        if (IS_ERR(reply)) {
1696                err = PTR_ERR(reply);
1697                ovs_dp_detach_port(vport);
1698                goto exit_unlock;
1699        }
1700        genl_notify(reply, genl_info_net(info), info->snd_portid,
1701                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1702
1703exit_unlock:
1704        rtnl_unlock();
1705exit:
1706        return err;
1707}
1708
1709static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1710{
1711        struct nlattr **a = info->attrs;
1712        struct sk_buff *reply;
1713        struct vport *vport;
1714        int err;
1715
1716        rtnl_lock();
1717        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1718        err = PTR_ERR(vport);
1719        if (IS_ERR(vport))
1720                goto exit_unlock;
1721
1722        err = 0;
1723        if (a[OVS_VPORT_ATTR_TYPE] &&
1724            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
1725                err = -EINVAL;
1726
1727        reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1728        if (!reply) {
1729                err = -ENOMEM;
1730                goto exit_unlock;
1731        }
1732
1733        if (!err && a[OVS_VPORT_ATTR_OPTIONS])
1734                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1735        if (err)
1736                goto exit_free;
1737
1738        if (a[OVS_VPORT_ATTR_UPCALL_PID])
1739                vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1740
1741        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1742                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1743        BUG_ON(err < 0);
1744
1745        genl_notify(reply, genl_info_net(info), info->snd_portid,
1746                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1747
1748        rtnl_unlock();
1749        return 0;
1750
1751exit_free:
1752        kfree_skb(reply);
1753exit_unlock:
1754        rtnl_unlock();
1755        return err;
1756}
1757
1758static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1759{
1760        struct nlattr **a = info->attrs;
1761        struct sk_buff *reply;
1762        struct vport *vport;
1763        int err;
1764
1765        rtnl_lock();
1766        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1767        err = PTR_ERR(vport);
1768        if (IS_ERR(vport))
1769                goto exit_unlock;
1770
1771        if (vport->port_no == OVSP_LOCAL) {
1772                err = -EINVAL;
1773                goto exit_unlock;
1774        }
1775
1776        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1777                                         OVS_VPORT_CMD_DEL);
1778        err = PTR_ERR(reply);
1779        if (IS_ERR(reply))
1780                goto exit_unlock;
1781
1782        err = 0;
1783        ovs_dp_detach_port(vport);
1784
1785        genl_notify(reply, genl_info_net(info), info->snd_portid,
1786                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1787
1788exit_unlock:
1789        rtnl_unlock();
1790        return err;
1791}
1792
1793static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1794{
1795        struct nlattr **a = info->attrs;
1796        struct ovs_header *ovs_header = info->userhdr;
1797        struct sk_buff *reply;
1798        struct vport *vport;
1799        int err;
1800
1801        rcu_read_lock();
1802        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
1803        err = PTR_ERR(vport);
1804        if (IS_ERR(vport))
1805                goto exit_unlock;
1806
1807        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1808                                         OVS_VPORT_CMD_NEW);
1809        err = PTR_ERR(reply);
1810        if (IS_ERR(reply))
1811                goto exit_unlock;
1812
1813        rcu_read_unlock();
1814
1815        return genlmsg_reply(reply, info);
1816
1817exit_unlock:
1818        rcu_read_unlock();
1819        return err;
1820}
1821
1822static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1823{
1824        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1825        struct datapath *dp;
1826        int bucket = cb->args[0], skip = cb->args[1];
1827        int i, j = 0;
1828
1829        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1830        if (!dp)
1831                return -ENODEV;
1832
1833        rcu_read_lock();
1834        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
1835                struct vport *vport;
1836
1837                j = 0;
1838                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1839                        if (j >= skip &&
1840                            ovs_vport_cmd_fill_info(vport, skb,
1841                                                    NETLINK_CB(cb->skb).portid,
1842                                                    cb->nlh->nlmsg_seq,
1843                                                    NLM_F_MULTI,
1844                                                    OVS_VPORT_CMD_NEW) < 0)
1845                                goto out;
1846
1847                        j++;
1848                }
1849                skip = 0;
1850        }
1851out:
1852        rcu_read_unlock();
1853
1854        cb->args[0] = i;
1855        cb->args[1] = j;
1856
1857        return skb->len;
1858}
1859
1860static struct genl_ops dp_vport_genl_ops[] = {
1861        { .cmd = OVS_VPORT_CMD_NEW,
1862          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1863          .policy = vport_policy,
1864          .doit = ovs_vport_cmd_new
1865        },
1866        { .cmd = OVS_VPORT_CMD_DEL,
1867          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1868          .policy = vport_policy,
1869          .doit = ovs_vport_cmd_del
1870        },
1871        { .cmd = OVS_VPORT_CMD_GET,
1872          .flags = 0,               /* OK for unprivileged users. */
1873          .policy = vport_policy,
1874          .doit = ovs_vport_cmd_get,
1875          .dumpit = ovs_vport_cmd_dump
1876        },
1877        { .cmd = OVS_VPORT_CMD_SET,
1878          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1879          .policy = vport_policy,
1880          .doit = ovs_vport_cmd_set,
1881        },
1882};
1883
1884struct genl_family_and_ops {
1885        struct genl_family *family;
1886        struct genl_ops *ops;
1887        int n_ops;
1888        struct genl_multicast_group *group;
1889};
1890
1891static const struct genl_family_and_ops dp_genl_families[] = {
1892        { &dp_datapath_genl_family,
1893          dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1894          &ovs_dp_datapath_multicast_group },
1895        { &dp_vport_genl_family,
1896          dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1897          &ovs_dp_vport_multicast_group },
1898        { &dp_flow_genl_family,
1899          dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1900          &ovs_dp_flow_multicast_group },
1901        { &dp_packet_genl_family,
1902          dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1903          NULL },
1904};
1905
1906static void dp_unregister_genl(int n_families)
1907{
1908        int i;
1909
1910        for (i = 0; i < n_families; i++)
1911                genl_unregister_family(dp_genl_families[i].family);
1912}
1913
1914static int dp_register_genl(void)
1915{
1916        int n_registered;
1917        int err;
1918        int i;
1919
1920        n_registered = 0;
1921        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1922                const struct genl_family_and_ops *f = &dp_genl_families[i];
1923
1924                err = genl_register_family_with_ops(f->family, f->ops,
1925                                                    f->n_ops);
1926                if (err)
1927                        goto error;
1928                n_registered++;
1929
1930                if (f->group) {
1931                        err = genl_register_mc_group(f->family, f->group);
1932                        if (err)
1933                                goto error;
1934                }
1935        }
1936
1937        return 0;
1938
1939error:
1940        dp_unregister_genl(n_registered);
1941        return err;
1942}
1943
1944static void rehash_flow_table(struct work_struct *work)
1945{
1946        struct datapath *dp;
1947        struct net *net;
1948
1949        genl_lock();
1950        rtnl_lock();
1951        for_each_net(net) {
1952                struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1953
1954                list_for_each_entry(dp, &ovs_net->dps, list_node) {
1955                        struct flow_table *old_table = genl_dereference(dp->table);
1956                        struct flow_table *new_table;
1957
1958                        new_table = ovs_flow_tbl_rehash(old_table);
1959                        if (!IS_ERR(new_table)) {
1960                                rcu_assign_pointer(dp->table, new_table);
1961                                ovs_flow_tbl_deferred_destroy(old_table);
1962                        }
1963                }
1964        }
1965        rtnl_unlock();
1966        genl_unlock();
1967
1968        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1969}
1970
1971static int __net_init ovs_init_net(struct net *net)
1972{
1973        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1974
1975        INIT_LIST_HEAD(&ovs_net->dps);
1976        return 0;
1977}
1978
1979static void __net_exit ovs_exit_net(struct net *net)
1980{
1981        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1982        struct datapath *dp, *dp_next;
1983
1984        genl_lock();
1985        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1986                __dp_destroy(dp);
1987        genl_unlock();
1988}
1989
1990static struct pernet_operations ovs_net_ops = {
1991        .init = ovs_init_net,
1992        .exit = ovs_exit_net,
1993        .id   = &ovs_net_id,
1994        .size = sizeof(struct ovs_net),
1995};
1996
1997static int __init dp_init(void)
1998{
1999        int err;
2000
2001        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2002
2003        pr_info("Open vSwitch switching datapath\n");
2004
2005        err = ovs_flow_init();
2006        if (err)
2007                goto error;
2008
2009        err = ovs_vport_init();
2010        if (err)
2011                goto error_flow_exit;
2012
2013        err = register_pernet_device(&ovs_net_ops);
2014        if (err)
2015                goto error_vport_exit;
2016
2017        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2018        if (err)
2019                goto error_netns_exit;
2020
2021        err = dp_register_genl();
2022        if (err < 0)
2023                goto error_unreg_notifier;
2024
2025        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2026
2027        return 0;
2028
2029error_unreg_notifier:
2030        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2031error_netns_exit:
2032        unregister_pernet_device(&ovs_net_ops);
2033error_vport_exit:
2034        ovs_vport_exit();
2035error_flow_exit:
2036        ovs_flow_exit();
2037error:
2038        return err;
2039}
2040
2041static void dp_cleanup(void)
2042{
2043        cancel_delayed_work_sync(&rehash_flow_wq);
2044        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2045        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2046        unregister_pernet_device(&ovs_net_ops);
2047        rcu_barrier();
2048        ovs_vport_exit();
2049        ovs_flow_exit();
2050}
2051
2052module_init(dp_init);
2053module_exit(dp_cleanup);
2054
2055MODULE_DESCRIPTION("Open vSwitch switching datapath");
2056MODULE_LICENSE("GPL");
2057