linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2012 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/lockdep.h>
  48#include <linux/openvswitch.h>
  49#include <linux/rculist.h>
  50#include <linux/dmi.h>
  51#include <linux/workqueue.h>
  52#include <net/genetlink.h>
  53#include <net/net_namespace.h>
  54#include <net/netns/generic.h>
  55
  56#include "datapath.h"
  57#include "flow.h"
  58#include "vport-internal_dev.h"
  59#include "vport-netdev.h"
  60
  61
  62#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
  63static void rehash_flow_table(struct work_struct *work);
  64static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
  65
  66int ovs_net_id __read_mostly;
  67
  68static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
  69                       struct genl_multicast_group *grp)
  70{
  71        genl_notify(skb, genl_info_net(info), info->snd_portid,
  72                    grp->id, info->nlhdr, GFP_KERNEL);
  73}
  74
  75/**
  76 * DOC: Locking:
  77 *
  78 * All writes e.g. Writes to device state (add/remove datapath, port, set
  79 * operations on vports, etc.), Writes to other state (flow table
  80 * modifications, set miscellaneous datapath parameters, etc.) are protected
  81 * by ovs_lock.
  82 *
  83 * Reads are protected by RCU.
  84 *
  85 * There are a few special cases (mostly stats) that have their own
  86 * synchronization but they nest under all of above and don't interact with
  87 * each other.
  88 *
  89 * The RTNL lock nests inside ovs_mutex.
  90 */
  91
  92static DEFINE_MUTEX(ovs_mutex);
  93
  94void ovs_lock(void)
  95{
  96        mutex_lock(&ovs_mutex);
  97}
  98
  99void ovs_unlock(void)
 100{
 101        mutex_unlock(&ovs_mutex);
 102}
 103
 104#ifdef CONFIG_LOCKDEP
 105int lockdep_ovsl_is_held(void)
 106{
 107        if (debug_locks)
 108                return lockdep_is_held(&ovs_mutex);
 109        else
 110                return 1;
 111}
 112#endif
 113
 114static struct vport *new_vport(const struct vport_parms *);
 115static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
 116                             const struct dp_upcall_info *);
 117static int queue_userspace_packet(struct net *, int dp_ifindex,
 118                                  struct sk_buff *,
 119                                  const struct dp_upcall_info *);
 120
 121/* Must be called with rcu_read_lock or ovs_mutex. */
 122static struct datapath *get_dp(struct net *net, int dp_ifindex)
 123{
 124        struct datapath *dp = NULL;
 125        struct net_device *dev;
 126
 127        rcu_read_lock();
 128        dev = dev_get_by_index_rcu(net, dp_ifindex);
 129        if (dev) {
 130                struct vport *vport = ovs_internal_dev_get_vport(dev);
 131                if (vport)
 132                        dp = vport->dp;
 133        }
 134        rcu_read_unlock();
 135
 136        return dp;
 137}
 138
 139/* Must be called with rcu_read_lock or ovs_mutex. */
 140const char *ovs_dp_name(const struct datapath *dp)
 141{
 142        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 143        return vport->ops->get_name(vport);
 144}
 145
 146static int get_dpifindex(struct datapath *dp)
 147{
 148        struct vport *local;
 149        int ifindex;
 150
 151        rcu_read_lock();
 152
 153        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 154        if (local)
 155                ifindex = netdev_vport_priv(local)->dev->ifindex;
 156        else
 157                ifindex = 0;
 158
 159        rcu_read_unlock();
 160
 161        return ifindex;
 162}
 163
 164static void destroy_dp_rcu(struct rcu_head *rcu)
 165{
 166        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 167
 168        ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
 169        free_percpu(dp->stats_percpu);
 170        release_net(ovs_dp_get_net(dp));
 171        kfree(dp->ports);
 172        kfree(dp);
 173}
 174
 175static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 176                                            u16 port_no)
 177{
 178        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 179}
 180
 181struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 182{
 183        struct vport *vport;
 184        struct hlist_head *head;
 185
 186        head = vport_hash_bucket(dp, port_no);
 187        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 188                if (vport->port_no == port_no)
 189                        return vport;
 190        }
 191        return NULL;
 192}
 193
 194/* Called with ovs_mutex. */
 195static struct vport *new_vport(const struct vport_parms *parms)
 196{
 197        struct vport *vport;
 198
 199        vport = ovs_vport_add(parms);
 200        if (!IS_ERR(vport)) {
 201                struct datapath *dp = parms->dp;
 202                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 203
 204                hlist_add_head_rcu(&vport->dp_hash_node, head);
 205        }
 206        return vport;
 207}
 208
 209void ovs_dp_detach_port(struct vport *p)
 210{
 211        ASSERT_OVSL();
 212
 213        /* First drop references to device. */
 214        hlist_del_rcu(&p->dp_hash_node);
 215
 216        /* Then destroy it. */
 217        ovs_vport_del(p);
 218}
 219
 220/* Must be called with rcu_read_lock. */
 221void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 222{
 223        struct datapath *dp = p->dp;
 224        struct sw_flow *flow;
 225        struct dp_stats_percpu *stats;
 226        struct sw_flow_key key;
 227        u64 *stats_counter;
 228        int error;
 229        int key_len;
 230
 231        stats = this_cpu_ptr(dp->stats_percpu);
 232
 233        /* Extract flow from 'skb' into 'key'. */
 234        error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
 235        if (unlikely(error)) {
 236                kfree_skb(skb);
 237                return;
 238        }
 239
 240        /* Look up flow. */
 241        flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
 242        if (unlikely(!flow)) {
 243                struct dp_upcall_info upcall;
 244
 245                upcall.cmd = OVS_PACKET_CMD_MISS;
 246                upcall.key = &key;
 247                upcall.userdata = NULL;
 248                upcall.portid = p->upcall_portid;
 249                ovs_dp_upcall(dp, skb, &upcall);
 250                consume_skb(skb);
 251                stats_counter = &stats->n_missed;
 252                goto out;
 253        }
 254
 255        OVS_CB(skb)->flow = flow;
 256
 257        stats_counter = &stats->n_hit;
 258        ovs_flow_used(OVS_CB(skb)->flow, skb);
 259        ovs_execute_actions(dp, skb);
 260
 261out:
 262        /* Update datapath statistics. */
 263        u64_stats_update_begin(&stats->sync);
 264        (*stats_counter)++;
 265        u64_stats_update_end(&stats->sync);
 266}
 267
 268static struct genl_family dp_packet_genl_family = {
 269        .id = GENL_ID_GENERATE,
 270        .hdrsize = sizeof(struct ovs_header),
 271        .name = OVS_PACKET_FAMILY,
 272        .version = OVS_PACKET_VERSION,
 273        .maxattr = OVS_PACKET_ATTR_MAX,
 274        .netnsok = true,
 275        .parallel_ops = true,
 276};
 277
 278int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 279                  const struct dp_upcall_info *upcall_info)
 280{
 281        struct dp_stats_percpu *stats;
 282        int dp_ifindex;
 283        int err;
 284
 285        if (upcall_info->portid == 0) {
 286                err = -ENOTCONN;
 287                goto err;
 288        }
 289
 290        dp_ifindex = get_dpifindex(dp);
 291        if (!dp_ifindex) {
 292                err = -ENODEV;
 293                goto err;
 294        }
 295
 296        if (!skb_is_gso(skb))
 297                err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 298        else
 299                err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 300        if (err)
 301                goto err;
 302
 303        return 0;
 304
 305err:
 306        stats = this_cpu_ptr(dp->stats_percpu);
 307
 308        u64_stats_update_begin(&stats->sync);
 309        stats->n_lost++;
 310        u64_stats_update_end(&stats->sync);
 311
 312        return err;
 313}
 314
 315static int queue_gso_packets(struct net *net, int dp_ifindex,
 316                             struct sk_buff *skb,
 317                             const struct dp_upcall_info *upcall_info)
 318{
 319        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 320        struct dp_upcall_info later_info;
 321        struct sw_flow_key later_key;
 322        struct sk_buff *segs, *nskb;
 323        int err;
 324
 325        segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
 326        if (IS_ERR(segs))
 327                return PTR_ERR(segs);
 328
 329        /* Queue all of the segments. */
 330        skb = segs;
 331        do {
 332                err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
 333                if (err)
 334                        break;
 335
 336                if (skb == segs && gso_type & SKB_GSO_UDP) {
 337                        /* The initial flow key extracted by ovs_flow_extract()
 338                         * in this case is for a first fragment, so we need to
 339                         * properly mark later fragments.
 340                         */
 341                        later_key = *upcall_info->key;
 342                        later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 343
 344                        later_info = *upcall_info;
 345                        later_info.key = &later_key;
 346                        upcall_info = &later_info;
 347                }
 348        } while ((skb = skb->next));
 349
 350        /* Free all of the segments. */
 351        skb = segs;
 352        do {
 353                nskb = skb->next;
 354                if (err)
 355                        kfree_skb(skb);
 356                else
 357                        consume_skb(skb);
 358        } while ((skb = nskb));
 359        return err;
 360}
 361
 362static size_t key_attr_size(void)
 363{
 364        return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 365                + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
 366                  + nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
 367                  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
 368                  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
 369                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
 370                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
 371                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 372                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
 373                + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 374                + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 375                + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 376                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 377                + nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */
 378                + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
 379                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 380                + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
 381                + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
 382                + nla_total_size(28); /* OVS_KEY_ATTR_ND */
 383}
 384
 385static size_t upcall_msg_size(const struct sk_buff *skb,
 386                              const struct nlattr *userdata)
 387{
 388        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 389                + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
 390                + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
 391
 392        /* OVS_PACKET_ATTR_USERDATA */
 393        if (userdata)
 394                size += NLA_ALIGN(userdata->nla_len);
 395
 396        return size;
 397}
 398
 399static int queue_userspace_packet(struct net *net, int dp_ifindex,
 400                                  struct sk_buff *skb,
 401                                  const struct dp_upcall_info *upcall_info)
 402{
 403        struct ovs_header *upcall;
 404        struct sk_buff *nskb = NULL;
 405        struct sk_buff *user_skb; /* to be queued to userspace */
 406        struct nlattr *nla;
 407        int err;
 408
 409        if (vlan_tx_tag_present(skb)) {
 410                nskb = skb_clone(skb, GFP_ATOMIC);
 411                if (!nskb)
 412                        return -ENOMEM;
 413
 414                nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
 415                if (!nskb)
 416                        return -ENOMEM;
 417
 418                nskb->vlan_tci = 0;
 419                skb = nskb;
 420        }
 421
 422        if (nla_attr_size(skb->len) > USHRT_MAX) {
 423                err = -EFBIG;
 424                goto out;
 425        }
 426
 427        user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
 428        if (!user_skb) {
 429                err = -ENOMEM;
 430                goto out;
 431        }
 432
 433        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 434                             0, upcall_info->cmd);
 435        upcall->dp_ifindex = dp_ifindex;
 436
 437        nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
 438        ovs_flow_to_nlattrs(upcall_info->key, user_skb);
 439        nla_nest_end(user_skb, nla);
 440
 441        if (upcall_info->userdata)
 442                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 443                          nla_len(upcall_info->userdata),
 444                          nla_data(upcall_info->userdata));
 445
 446        nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
 447
 448        skb_copy_and_csum_dev(skb, nla_data(nla));
 449
 450        genlmsg_end(user_skb, upcall);
 451        err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 452
 453out:
 454        kfree_skb(nskb);
 455        return err;
 456}
 457
 458/* Called with ovs_mutex. */
 459static int flush_flows(struct datapath *dp)
 460{
 461        struct flow_table *old_table;
 462        struct flow_table *new_table;
 463
 464        old_table = ovsl_dereference(dp->table);
 465        new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
 466        if (!new_table)
 467                return -ENOMEM;
 468
 469        rcu_assign_pointer(dp->table, new_table);
 470
 471        ovs_flow_tbl_deferred_destroy(old_table);
 472        return 0;
 473}
 474
 475static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
 476{
 477
 478        struct sw_flow_actions *acts;
 479        int new_acts_size;
 480        int req_size = NLA_ALIGN(attr_len);
 481        int next_offset = offsetof(struct sw_flow_actions, actions) +
 482                                        (*sfa)->actions_len;
 483
 484        if (req_size <= (ksize(*sfa) - next_offset))
 485                goto out;
 486
 487        new_acts_size = ksize(*sfa) * 2;
 488
 489        if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
 490                if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
 491                        return ERR_PTR(-EMSGSIZE);
 492                new_acts_size = MAX_ACTIONS_BUFSIZE;
 493        }
 494
 495        acts = ovs_flow_actions_alloc(new_acts_size);
 496        if (IS_ERR(acts))
 497                return (void *)acts;
 498
 499        memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
 500        acts->actions_len = (*sfa)->actions_len;
 501        kfree(*sfa);
 502        *sfa = acts;
 503
 504out:
 505        (*sfa)->actions_len += req_size;
 506        return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
 507}
 508
 509static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
 510{
 511        struct nlattr *a;
 512
 513        a = reserve_sfa_size(sfa, nla_attr_size(len));
 514        if (IS_ERR(a))
 515                return PTR_ERR(a);
 516
 517        a->nla_type = attrtype;
 518        a->nla_len = nla_attr_size(len);
 519
 520        if (data)
 521                memcpy(nla_data(a), data, len);
 522        memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
 523
 524        return 0;
 525}
 526
 527static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
 528{
 529        int used = (*sfa)->actions_len;
 530        int err;
 531
 532        err = add_action(sfa, attrtype, NULL, 0);
 533        if (err)
 534                return err;
 535
 536        return used;
 537}
 538
 539static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
 540{
 541        struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
 542
 543        a->nla_len = sfa->actions_len - st_offset;
 544}
 545
 546static int validate_and_copy_actions(const struct nlattr *attr,
 547                                     const struct sw_flow_key *key, int depth,
 548                                     struct sw_flow_actions **sfa);
 549
 550static int validate_and_copy_sample(const struct nlattr *attr,
 551                                    const struct sw_flow_key *key, int depth,
 552                                    struct sw_flow_actions **sfa)
 553{
 554        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
 555        const struct nlattr *probability, *actions;
 556        const struct nlattr *a;
 557        int rem, start, err, st_acts;
 558
 559        memset(attrs, 0, sizeof(attrs));
 560        nla_for_each_nested(a, attr, rem) {
 561                int type = nla_type(a);
 562                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
 563                        return -EINVAL;
 564                attrs[type] = a;
 565        }
 566        if (rem)
 567                return -EINVAL;
 568
 569        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
 570        if (!probability || nla_len(probability) != sizeof(u32))
 571                return -EINVAL;
 572
 573        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
 574        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
 575                return -EINVAL;
 576
 577        /* validation done, copy sample action. */
 578        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
 579        if (start < 0)
 580                return start;
 581        err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
 582        if (err)
 583                return err;
 584        st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
 585        if (st_acts < 0)
 586                return st_acts;
 587
 588        err = validate_and_copy_actions(actions, key, depth + 1, sfa);
 589        if (err)
 590                return err;
 591
 592        add_nested_action_end(*sfa, st_acts);
 593        add_nested_action_end(*sfa, start);
 594
 595        return 0;
 596}
 597
 598static int validate_tp_port(const struct sw_flow_key *flow_key)
 599{
 600        if (flow_key->eth.type == htons(ETH_P_IP)) {
 601                if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
 602                        return 0;
 603        } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
 604                if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
 605                        return 0;
 606        }
 607
 608        return -EINVAL;
 609}
 610
 611static int validate_and_copy_set_tun(const struct nlattr *attr,
 612                                     struct sw_flow_actions **sfa)
 613{
 614        struct ovs_key_ipv4_tunnel tun_key;
 615        int err, start;
 616
 617        err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
 618        if (err)
 619                return err;
 620
 621        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
 622        if (start < 0)
 623                return start;
 624
 625        err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
 626        add_nested_action_end(*sfa, start);
 627
 628        return err;
 629}
 630
 631static int validate_set(const struct nlattr *a,
 632                        const struct sw_flow_key *flow_key,
 633                        struct sw_flow_actions **sfa,
 634                        bool *set_tun)
 635{
 636        const struct nlattr *ovs_key = nla_data(a);
 637        int key_type = nla_type(ovs_key);
 638
 639        /* There can be only one key in a action */
 640        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 641                return -EINVAL;
 642
 643        if (key_type > OVS_KEY_ATTR_MAX ||
 644           (ovs_key_lens[key_type] != nla_len(ovs_key) &&
 645            ovs_key_lens[key_type] != -1))
 646                return -EINVAL;
 647
 648        switch (key_type) {
 649        const struct ovs_key_ipv4 *ipv4_key;
 650        const struct ovs_key_ipv6 *ipv6_key;
 651        int err;
 652
 653        case OVS_KEY_ATTR_PRIORITY:
 654        case OVS_KEY_ATTR_SKB_MARK:
 655        case OVS_KEY_ATTR_ETHERNET:
 656                break;
 657
 658        case OVS_KEY_ATTR_TUNNEL:
 659                *set_tun = true;
 660                err = validate_and_copy_set_tun(a, sfa);
 661                if (err)
 662                        return err;
 663                break;
 664
 665        case OVS_KEY_ATTR_IPV4:
 666                if (flow_key->eth.type != htons(ETH_P_IP))
 667                        return -EINVAL;
 668
 669                if (!flow_key->ip.proto)
 670                        return -EINVAL;
 671
 672                ipv4_key = nla_data(ovs_key);
 673                if (ipv4_key->ipv4_proto != flow_key->ip.proto)
 674                        return -EINVAL;
 675
 676                if (ipv4_key->ipv4_frag != flow_key->ip.frag)
 677                        return -EINVAL;
 678
 679                break;
 680
 681        case OVS_KEY_ATTR_IPV6:
 682                if (flow_key->eth.type != htons(ETH_P_IPV6))
 683                        return -EINVAL;
 684
 685                if (!flow_key->ip.proto)
 686                        return -EINVAL;
 687
 688                ipv6_key = nla_data(ovs_key);
 689                if (ipv6_key->ipv6_proto != flow_key->ip.proto)
 690                        return -EINVAL;
 691
 692                if (ipv6_key->ipv6_frag != flow_key->ip.frag)
 693                        return -EINVAL;
 694
 695                if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
 696                        return -EINVAL;
 697
 698                break;
 699
 700        case OVS_KEY_ATTR_TCP:
 701                if (flow_key->ip.proto != IPPROTO_TCP)
 702                        return -EINVAL;
 703
 704                return validate_tp_port(flow_key);
 705
 706        case OVS_KEY_ATTR_UDP:
 707                if (flow_key->ip.proto != IPPROTO_UDP)
 708                        return -EINVAL;
 709
 710                return validate_tp_port(flow_key);
 711
 712        default:
 713                return -EINVAL;
 714        }
 715
 716        return 0;
 717}
 718
 719static int validate_userspace(const struct nlattr *attr)
 720{
 721        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =   {
 722                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
 723                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
 724        };
 725        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 726        int error;
 727
 728        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
 729                                 attr, userspace_policy);
 730        if (error)
 731                return error;
 732
 733        if (!a[OVS_USERSPACE_ATTR_PID] ||
 734            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
 735                return -EINVAL;
 736
 737        return 0;
 738}
 739
 740static int copy_action(const struct nlattr *from,
 741                       struct sw_flow_actions **sfa)
 742{
 743        int totlen = NLA_ALIGN(from->nla_len);
 744        struct nlattr *to;
 745
 746        to = reserve_sfa_size(sfa, from->nla_len);
 747        if (IS_ERR(to))
 748                return PTR_ERR(to);
 749
 750        memcpy(to, from, totlen);
 751        return 0;
 752}
 753
 754static int validate_and_copy_actions(const struct nlattr *attr,
 755                                     const struct sw_flow_key *key,
 756                                     int depth,
 757                                     struct sw_flow_actions **sfa)
 758{
 759        const struct nlattr *a;
 760        int rem, err;
 761
 762        if (depth >= SAMPLE_ACTION_DEPTH)
 763                return -EOVERFLOW;
 764
 765        nla_for_each_nested(a, attr, rem) {
 766                /* Expected argument lengths, (u32)-1 for variable length. */
 767                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 768                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 769                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
 770                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 771                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
 772                        [OVS_ACTION_ATTR_SET] = (u32)-1,
 773                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
 774                };
 775                const struct ovs_action_push_vlan *vlan;
 776                int type = nla_type(a);
 777                bool skip_copy;
 778
 779                if (type > OVS_ACTION_ATTR_MAX ||
 780                    (action_lens[type] != nla_len(a) &&
 781                     action_lens[type] != (u32)-1))
 782                        return -EINVAL;
 783
 784                skip_copy = false;
 785                switch (type) {
 786                case OVS_ACTION_ATTR_UNSPEC:
 787                        return -EINVAL;
 788
 789                case OVS_ACTION_ATTR_USERSPACE:
 790                        err = validate_userspace(a);
 791                        if (err)
 792                                return err;
 793                        break;
 794
 795                case OVS_ACTION_ATTR_OUTPUT:
 796                        if (nla_get_u32(a) >= DP_MAX_PORTS)
 797                                return -EINVAL;
 798                        break;
 799
 800
 801                case OVS_ACTION_ATTR_POP_VLAN:
 802                        break;
 803
 804                case OVS_ACTION_ATTR_PUSH_VLAN:
 805                        vlan = nla_data(a);
 806                        if (vlan->vlan_tpid != htons(ETH_P_8021Q))
 807                                return -EINVAL;
 808                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
 809                                return -EINVAL;
 810                        break;
 811
 812                case OVS_ACTION_ATTR_SET:
 813                        err = validate_set(a, key, sfa, &skip_copy);
 814                        if (err)
 815                                return err;
 816                        break;
 817
 818                case OVS_ACTION_ATTR_SAMPLE:
 819                        err = validate_and_copy_sample(a, key, depth, sfa);
 820                        if (err)
 821                                return err;
 822                        skip_copy = true;
 823                        break;
 824
 825                default:
 826                        return -EINVAL;
 827                }
 828                if (!skip_copy) {
 829                        err = copy_action(a, sfa);
 830                        if (err)
 831                                return err;
 832                }
 833        }
 834
 835        if (rem > 0)
 836                return -EINVAL;
 837
 838        return 0;
 839}
 840
 841static void clear_stats(struct sw_flow *flow)
 842{
 843        flow->used = 0;
 844        flow->tcp_flags = 0;
 845        flow->packet_count = 0;
 846        flow->byte_count = 0;
 847}
 848
 849static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 850{
 851        struct ovs_header *ovs_header = info->userhdr;
 852        struct nlattr **a = info->attrs;
 853        struct sw_flow_actions *acts;
 854        struct sk_buff *packet;
 855        struct sw_flow *flow;
 856        struct datapath *dp;
 857        struct ethhdr *eth;
 858        int len;
 859        int err;
 860        int key_len;
 861
 862        err = -EINVAL;
 863        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 864            !a[OVS_PACKET_ATTR_ACTIONS])
 865                goto err;
 866
 867        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 868        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 869        err = -ENOMEM;
 870        if (!packet)
 871                goto err;
 872        skb_reserve(packet, NET_IP_ALIGN);
 873
 874        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 875
 876        skb_reset_mac_header(packet);
 877        eth = eth_hdr(packet);
 878
 879        /* Normally, setting the skb 'protocol' field would be handled by a
 880         * call to eth_type_trans(), but it assumes there's a sending
 881         * device, which we may not have. */
 882        if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 883                packet->protocol = eth->h_proto;
 884        else
 885                packet->protocol = htons(ETH_P_802_2);
 886
 887        /* Build an sw_flow for sending this packet. */
 888        flow = ovs_flow_alloc();
 889        err = PTR_ERR(flow);
 890        if (IS_ERR(flow))
 891                goto err_kfree_skb;
 892
 893        err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
 894        if (err)
 895                goto err_flow_free;
 896
 897        err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
 898        if (err)
 899                goto err_flow_free;
 900        acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
 901        err = PTR_ERR(acts);
 902        if (IS_ERR(acts))
 903                goto err_flow_free;
 904
 905        err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
 906        rcu_assign_pointer(flow->sf_acts, acts);
 907        if (err)
 908                goto err_flow_free;
 909
 910        OVS_CB(packet)->flow = flow;
 911        packet->priority = flow->key.phy.priority;
 912        packet->mark = flow->key.phy.skb_mark;
 913
 914        rcu_read_lock();
 915        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 916        err = -ENODEV;
 917        if (!dp)
 918                goto err_unlock;
 919
 920        local_bh_disable();
 921        err = ovs_execute_actions(dp, packet);
 922        local_bh_enable();
 923        rcu_read_unlock();
 924
 925        ovs_flow_free(flow);
 926        return err;
 927
 928err_unlock:
 929        rcu_read_unlock();
 930err_flow_free:
 931        ovs_flow_free(flow);
 932err_kfree_skb:
 933        kfree_skb(packet);
 934err:
 935        return err;
 936}
 937
 938static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 939        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 940        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 941        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 942};
 943
 944static struct genl_ops dp_packet_genl_ops[] = {
 945        { .cmd = OVS_PACKET_CMD_EXECUTE,
 946          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 947          .policy = packet_policy,
 948          .doit = ovs_packet_cmd_execute
 949        }
 950};
 951
 952static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 953{
 954        int i;
 955        struct flow_table *table = ovsl_dereference(dp->table);
 956
 957        stats->n_flows = ovs_flow_tbl_count(table);
 958
 959        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 960        for_each_possible_cpu(i) {
 961                const struct dp_stats_percpu *percpu_stats;
 962                struct dp_stats_percpu local_stats;
 963                unsigned int start;
 964
 965                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 966
 967                do {
 968                        start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
 969                        local_stats = *percpu_stats;
 970                } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 971
 972                stats->n_hit += local_stats.n_hit;
 973                stats->n_missed += local_stats.n_missed;
 974                stats->n_lost += local_stats.n_lost;
 975        }
 976}
 977
 978static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 979        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
 980        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 981        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 982};
 983
 984static struct genl_family dp_flow_genl_family = {
 985        .id = GENL_ID_GENERATE,
 986        .hdrsize = sizeof(struct ovs_header),
 987        .name = OVS_FLOW_FAMILY,
 988        .version = OVS_FLOW_VERSION,
 989        .maxattr = OVS_FLOW_ATTR_MAX,
 990        .netnsok = true,
 991        .parallel_ops = true,
 992};
 993
 994static struct genl_multicast_group ovs_dp_flow_multicast_group = {
 995        .name = OVS_FLOW_MCGROUP
 996};
 997
 998static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
 999static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1000{
1001        const struct nlattr *a;
1002        struct nlattr *start;
1003        int err = 0, rem;
1004
1005        start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1006        if (!start)
1007                return -EMSGSIZE;
1008
1009        nla_for_each_nested(a, attr, rem) {
1010                int type = nla_type(a);
1011                struct nlattr *st_sample;
1012
1013                switch (type) {
1014                case OVS_SAMPLE_ATTR_PROBABILITY:
1015                        if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
1016                                return -EMSGSIZE;
1017                        break;
1018                case OVS_SAMPLE_ATTR_ACTIONS:
1019                        st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1020                        if (!st_sample)
1021                                return -EMSGSIZE;
1022                        err = actions_to_attr(nla_data(a), nla_len(a), skb);
1023                        if (err)
1024                                return err;
1025                        nla_nest_end(skb, st_sample);
1026                        break;
1027                }
1028        }
1029
1030        nla_nest_end(skb, start);
1031        return err;
1032}
1033
1034static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1035{
1036        const struct nlattr *ovs_key = nla_data(a);
1037        int key_type = nla_type(ovs_key);
1038        struct nlattr *start;
1039        int err;
1040
1041        switch (key_type) {
1042        case OVS_KEY_ATTR_IPV4_TUNNEL:
1043                start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1044                if (!start)
1045                        return -EMSGSIZE;
1046
1047                err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
1048                if (err)
1049                        return err;
1050                nla_nest_end(skb, start);
1051                break;
1052        default:
1053                if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1054                        return -EMSGSIZE;
1055                break;
1056        }
1057
1058        return 0;
1059}
1060
1061static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
1062{
1063        const struct nlattr *a;
1064        int rem, err;
1065
1066        nla_for_each_attr(a, attr, len, rem) {
1067                int type = nla_type(a);
1068
1069                switch (type) {
1070                case OVS_ACTION_ATTR_SET:
1071                        err = set_action_to_attr(a, skb);
1072                        if (err)
1073                                return err;
1074                        break;
1075
1076                case OVS_ACTION_ATTR_SAMPLE:
1077                        err = sample_action_to_attr(a, skb);
1078                        if (err)
1079                                return err;
1080                        break;
1081                default:
1082                        if (nla_put(skb, type, nla_len(a), nla_data(a)))
1083                                return -EMSGSIZE;
1084                        break;
1085                }
1086        }
1087
1088        return 0;
1089}
1090
1091static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
1092{
1093        return NLMSG_ALIGN(sizeof(struct ovs_header))
1094                + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
1095                + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
1096                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
1097                + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
1098                + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
1099}
1100
1101/* Called with ovs_mutex. */
1102static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
1103                                  struct sk_buff *skb, u32 portid,
1104                                  u32 seq, u32 flags, u8 cmd)
1105{
1106        const int skb_orig_len = skb->len;
1107        const struct sw_flow_actions *sf_acts;
1108        struct nlattr *start;
1109        struct ovs_flow_stats stats;
1110        struct ovs_header *ovs_header;
1111        struct nlattr *nla;
1112        unsigned long used;
1113        u8 tcp_flags;
1114        int err;
1115
1116        sf_acts = ovsl_dereference(flow->sf_acts);
1117
1118        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
1119        if (!ovs_header)
1120                return -EMSGSIZE;
1121
1122        ovs_header->dp_ifindex = get_dpifindex(dp);
1123
1124        nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
1125        if (!nla)
1126                goto nla_put_failure;
1127        err = ovs_flow_to_nlattrs(&flow->key, skb);
1128        if (err)
1129                goto error;
1130        nla_nest_end(skb, nla);
1131
1132        spin_lock_bh(&flow->lock);
1133        used = flow->used;
1134        stats.n_packets = flow->packet_count;
1135        stats.n_bytes = flow->byte_count;
1136        tcp_flags = flow->tcp_flags;
1137        spin_unlock_bh(&flow->lock);
1138
1139        if (used &&
1140            nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
1141                goto nla_put_failure;
1142
1143        if (stats.n_packets &&
1144            nla_put(skb, OVS_FLOW_ATTR_STATS,
1145                    sizeof(struct ovs_flow_stats), &stats))
1146                goto nla_put_failure;
1147
1148        if (tcp_flags &&
1149            nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
1150                goto nla_put_failure;
1151
1152        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
1153         * this is the first flow to be dumped into 'skb'.  This is unusual for
1154         * Netlink but individual action lists can be longer than
1155         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
1156         * The userspace caller can always fetch the actions separately if it
1157         * really wants them.  (Most userspace callers in fact don't care.)
1158         *
1159         * This can only fail for dump operations because the skb is always
1160         * properly sized for single flows.
1161         */
1162        start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
1163        if (start) {
1164                err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
1165                if (!err)
1166                        nla_nest_end(skb, start);
1167                else {
1168                        if (skb_orig_len)
1169                                goto error;
1170
1171                        nla_nest_cancel(skb, start);
1172                }
1173        } else if (skb_orig_len)
1174                goto nla_put_failure;
1175
1176        return genlmsg_end(skb, ovs_header);
1177
1178nla_put_failure:
1179        err = -EMSGSIZE;
1180error:
1181        genlmsg_cancel(skb, ovs_header);
1182        return err;
1183}
1184
1185static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
1186{
1187        const struct sw_flow_actions *sf_acts;
1188
1189        sf_acts = ovsl_dereference(flow->sf_acts);
1190
1191        return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
1192}
1193
1194static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
1195                                               struct datapath *dp,
1196                                               u32 portid, u32 seq, u8 cmd)
1197{
1198        struct sk_buff *skb;
1199        int retval;
1200
1201        skb = ovs_flow_cmd_alloc_info(flow);
1202        if (!skb)
1203                return ERR_PTR(-ENOMEM);
1204
1205        retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
1206        BUG_ON(retval < 0);
1207        return skb;
1208}
1209
1210static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1211{
1212        struct nlattr **a = info->attrs;
1213        struct ovs_header *ovs_header = info->userhdr;
1214        struct sw_flow_key key;
1215        struct sw_flow *flow;
1216        struct sk_buff *reply;
1217        struct datapath *dp;
1218        struct flow_table *table;
1219        struct sw_flow_actions *acts = NULL;
1220        int error;
1221        int key_len;
1222
1223        /* Extract key. */
1224        error = -EINVAL;
1225        if (!a[OVS_FLOW_ATTR_KEY])
1226                goto error;
1227        error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1228        if (error)
1229                goto error;
1230
1231        /* Validate actions. */
1232        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1233                acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
1234                error = PTR_ERR(acts);
1235                if (IS_ERR(acts))
1236                        goto error;
1237
1238                error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0, &acts);
1239                if (error)
1240                        goto err_kfree;
1241        } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
1242                error = -EINVAL;
1243                goto error;
1244        }
1245
1246        ovs_lock();
1247        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1248        error = -ENODEV;
1249        if (!dp)
1250                goto err_unlock_ovs;
1251
1252        table = ovsl_dereference(dp->table);
1253        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1254        if (!flow) {
1255                /* Bail out if we're not allowed to create a new flow. */
1256                error = -ENOENT;
1257                if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
1258                        goto err_unlock_ovs;
1259
1260                /* Expand table, if necessary, to make room. */
1261                if (ovs_flow_tbl_need_to_expand(table)) {
1262                        struct flow_table *new_table;
1263
1264                        new_table = ovs_flow_tbl_expand(table);
1265                        if (!IS_ERR(new_table)) {
1266                                rcu_assign_pointer(dp->table, new_table);
1267                                ovs_flow_tbl_deferred_destroy(table);
1268                                table = ovsl_dereference(dp->table);
1269                        }
1270                }
1271
1272                /* Allocate flow. */
1273                flow = ovs_flow_alloc();
1274                if (IS_ERR(flow)) {
1275                        error = PTR_ERR(flow);
1276                        goto err_unlock_ovs;
1277                }
1278                clear_stats(flow);
1279
1280                rcu_assign_pointer(flow->sf_acts, acts);
1281
1282                /* Put flow in bucket. */
1283                ovs_flow_tbl_insert(table, flow, &key, key_len);
1284
1285                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1286                                                info->snd_seq,
1287                                                OVS_FLOW_CMD_NEW);
1288        } else {
1289                /* We found a matching flow. */
1290                struct sw_flow_actions *old_acts;
1291
1292                /* Bail out if we're not allowed to modify an existing flow.
1293                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1294                 * because Generic Netlink treats the latter as a dump
1295                 * request.  We also accept NLM_F_EXCL in case that bug ever
1296                 * gets fixed.
1297                 */
1298                error = -EEXIST;
1299                if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1300                    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1301                        goto err_unlock_ovs;
1302
1303                /* Update actions. */
1304                old_acts = ovsl_dereference(flow->sf_acts);
1305                rcu_assign_pointer(flow->sf_acts, acts);
1306                ovs_flow_deferred_free_acts(old_acts);
1307
1308                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1309                                               info->snd_seq, OVS_FLOW_CMD_NEW);
1310
1311                /* Clear stats. */
1312                if (a[OVS_FLOW_ATTR_CLEAR]) {
1313                        spin_lock_bh(&flow->lock);
1314                        clear_stats(flow);
1315                        spin_unlock_bh(&flow->lock);
1316                }
1317        }
1318        ovs_unlock();
1319
1320        if (!IS_ERR(reply))
1321                ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1322        else
1323                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1324                                ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1325        return 0;
1326
1327err_unlock_ovs:
1328        ovs_unlock();
1329err_kfree:
1330        kfree(acts);
1331error:
1332        return error;
1333}
1334
1335static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1336{
1337        struct nlattr **a = info->attrs;
1338        struct ovs_header *ovs_header = info->userhdr;
1339        struct sw_flow_key key;
1340        struct sk_buff *reply;
1341        struct sw_flow *flow;
1342        struct datapath *dp;
1343        struct flow_table *table;
1344        int err;
1345        int key_len;
1346
1347        if (!a[OVS_FLOW_ATTR_KEY])
1348                return -EINVAL;
1349        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1350        if (err)
1351                return err;
1352
1353        ovs_lock();
1354        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1355        if (!dp) {
1356                err = -ENODEV;
1357                goto unlock;
1358        }
1359
1360        table = ovsl_dereference(dp->table);
1361        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1362        if (!flow) {
1363                err = -ENOENT;
1364                goto unlock;
1365        }
1366
1367        reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1368                                        info->snd_seq, OVS_FLOW_CMD_NEW);
1369        if (IS_ERR(reply)) {
1370                err = PTR_ERR(reply);
1371                goto unlock;
1372        }
1373
1374        ovs_unlock();
1375        return genlmsg_reply(reply, info);
1376unlock:
1377        ovs_unlock();
1378        return err;
1379}
1380
1381static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1382{
1383        struct nlattr **a = info->attrs;
1384        struct ovs_header *ovs_header = info->userhdr;
1385        struct sw_flow_key key;
1386        struct sk_buff *reply;
1387        struct sw_flow *flow;
1388        struct datapath *dp;
1389        struct flow_table *table;
1390        int err;
1391        int key_len;
1392
1393        ovs_lock();
1394        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1395        if (!dp) {
1396                err = -ENODEV;
1397                goto unlock;
1398        }
1399
1400        if (!a[OVS_FLOW_ATTR_KEY]) {
1401                err = flush_flows(dp);
1402                goto unlock;
1403        }
1404        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1405        if (err)
1406                goto unlock;
1407
1408        table = ovsl_dereference(dp->table);
1409        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1410        if (!flow) {
1411                err = -ENOENT;
1412                goto unlock;
1413        }
1414
1415        reply = ovs_flow_cmd_alloc_info(flow);
1416        if (!reply) {
1417                err = -ENOMEM;
1418                goto unlock;
1419        }
1420
1421        ovs_flow_tbl_remove(table, flow);
1422
1423        err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1424                                     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1425        BUG_ON(err < 0);
1426
1427        ovs_flow_deferred_free(flow);
1428        ovs_unlock();
1429
1430        ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1431        return 0;
1432unlock:
1433        ovs_unlock();
1434        return err;
1435}
1436
1437static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1438{
1439        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1440        struct datapath *dp;
1441        struct flow_table *table;
1442
1443        ovs_lock();
1444        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1445        if (!dp) {
1446                ovs_unlock();
1447                return -ENODEV;
1448        }
1449
1450        table = ovsl_dereference(dp->table);
1451
1452        for (;;) {
1453                struct sw_flow *flow;
1454                u32 bucket, obj;
1455
1456                bucket = cb->args[0];
1457                obj = cb->args[1];
1458                flow = ovs_flow_tbl_next(table, &bucket, &obj);
1459                if (!flow)
1460                        break;
1461
1462                if (ovs_flow_cmd_fill_info(flow, dp, skb,
1463                                           NETLINK_CB(cb->skb).portid,
1464                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1465                                           OVS_FLOW_CMD_NEW) < 0)
1466                        break;
1467
1468                cb->args[0] = bucket;
1469                cb->args[1] = obj;
1470        }
1471        ovs_unlock();
1472        return skb->len;
1473}
1474
1475static struct genl_ops dp_flow_genl_ops[] = {
1476        { .cmd = OVS_FLOW_CMD_NEW,
1477          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1478          .policy = flow_policy,
1479          .doit = ovs_flow_cmd_new_or_set
1480        },
1481        { .cmd = OVS_FLOW_CMD_DEL,
1482          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1483          .policy = flow_policy,
1484          .doit = ovs_flow_cmd_del
1485        },
1486        { .cmd = OVS_FLOW_CMD_GET,
1487          .flags = 0,               /* OK for unprivileged users. */
1488          .policy = flow_policy,
1489          .doit = ovs_flow_cmd_get,
1490          .dumpit = ovs_flow_cmd_dump
1491        },
1492        { .cmd = OVS_FLOW_CMD_SET,
1493          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1494          .policy = flow_policy,
1495          .doit = ovs_flow_cmd_new_or_set,
1496        },
1497};
1498
1499static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1500        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1501        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1502};
1503
1504static struct genl_family dp_datapath_genl_family = {
1505        .id = GENL_ID_GENERATE,
1506        .hdrsize = sizeof(struct ovs_header),
1507        .name = OVS_DATAPATH_FAMILY,
1508        .version = OVS_DATAPATH_VERSION,
1509        .maxattr = OVS_DP_ATTR_MAX,
1510        .netnsok = true,
1511        .parallel_ops = true,
1512};
1513
1514static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1515        .name = OVS_DATAPATH_MCGROUP
1516};
1517
1518static size_t ovs_dp_cmd_msg_size(void)
1519{
1520        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1521
1522        msgsize += nla_total_size(IFNAMSIZ);
1523        msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1524
1525        return msgsize;
1526}
1527
1528static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1529                                u32 portid, u32 seq, u32 flags, u8 cmd)
1530{
1531        struct ovs_header *ovs_header;
1532        struct ovs_dp_stats dp_stats;
1533        int err;
1534
1535        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1536                                   flags, cmd);
1537        if (!ovs_header)
1538                goto error;
1539
1540        ovs_header->dp_ifindex = get_dpifindex(dp);
1541
1542        rcu_read_lock();
1543        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1544        rcu_read_unlock();
1545        if (err)
1546                goto nla_put_failure;
1547
1548        get_dp_stats(dp, &dp_stats);
1549        if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1550                goto nla_put_failure;
1551
1552        return genlmsg_end(skb, ovs_header);
1553
1554nla_put_failure:
1555        genlmsg_cancel(skb, ovs_header);
1556error:
1557        return -EMSGSIZE;
1558}
1559
1560static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1561                                             u32 seq, u8 cmd)
1562{
1563        struct sk_buff *skb;
1564        int retval;
1565
1566        skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1567        if (!skb)
1568                return ERR_PTR(-ENOMEM);
1569
1570        retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1571        if (retval < 0) {
1572                kfree_skb(skb);
1573                return ERR_PTR(retval);
1574        }
1575        return skb;
1576}
1577
1578/* Called with ovs_mutex. */
1579static struct datapath *lookup_datapath(struct net *net,
1580                                        struct ovs_header *ovs_header,
1581                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1582{
1583        struct datapath *dp;
1584
1585        if (!a[OVS_DP_ATTR_NAME])
1586                dp = get_dp(net, ovs_header->dp_ifindex);
1587        else {
1588                struct vport *vport;
1589
1590                rcu_read_lock();
1591                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1592                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1593                rcu_read_unlock();
1594        }
1595        return dp ? dp : ERR_PTR(-ENODEV);
1596}
1597
1598static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1599{
1600        struct nlattr **a = info->attrs;
1601        struct vport_parms parms;
1602        struct sk_buff *reply;
1603        struct datapath *dp;
1604        struct vport *vport;
1605        struct ovs_net *ovs_net;
1606        int err, i;
1607
1608        err = -EINVAL;
1609        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1610                goto err;
1611
1612        ovs_lock();
1613
1614        err = -ENOMEM;
1615        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1616        if (dp == NULL)
1617                goto err_unlock_ovs;
1618
1619        ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1620
1621        /* Allocate table. */
1622        err = -ENOMEM;
1623        rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1624        if (!dp->table)
1625                goto err_free_dp;
1626
1627        dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1628        if (!dp->stats_percpu) {
1629                err = -ENOMEM;
1630                goto err_destroy_table;
1631        }
1632
1633        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1634                        GFP_KERNEL);
1635        if (!dp->ports) {
1636                err = -ENOMEM;
1637                goto err_destroy_percpu;
1638        }
1639
1640        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1641                INIT_HLIST_HEAD(&dp->ports[i]);
1642
1643        /* Set up our datapath device. */
1644        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1645        parms.type = OVS_VPORT_TYPE_INTERNAL;
1646        parms.options = NULL;
1647        parms.dp = dp;
1648        parms.port_no = OVSP_LOCAL;
1649        parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1650
1651        vport = new_vport(&parms);
1652        if (IS_ERR(vport)) {
1653                err = PTR_ERR(vport);
1654                if (err == -EBUSY)
1655                        err = -EEXIST;
1656
1657                goto err_destroy_ports_array;
1658        }
1659
1660        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1661                                      info->snd_seq, OVS_DP_CMD_NEW);
1662        err = PTR_ERR(reply);
1663        if (IS_ERR(reply))
1664                goto err_destroy_local_port;
1665
1666        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1667        list_add_tail(&dp->list_node, &ovs_net->dps);
1668
1669        ovs_unlock();
1670
1671        ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1672        return 0;
1673
1674err_destroy_local_port:
1675        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1676err_destroy_ports_array:
1677        kfree(dp->ports);
1678err_destroy_percpu:
1679        free_percpu(dp->stats_percpu);
1680err_destroy_table:
1681        ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1682err_free_dp:
1683        release_net(ovs_dp_get_net(dp));
1684        kfree(dp);
1685err_unlock_ovs:
1686        ovs_unlock();
1687err:
1688        return err;
1689}
1690
1691/* Called with ovs_mutex. */
1692static void __dp_destroy(struct datapath *dp)
1693{
1694        int i;
1695
1696        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1697                struct vport *vport;
1698                struct hlist_node *n;
1699
1700                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1701                        if (vport->port_no != OVSP_LOCAL)
1702                                ovs_dp_detach_port(vport);
1703        }
1704
1705        list_del(&dp->list_node);
1706
1707        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1708         * all port in datapath are destroyed first before freeing datapath.
1709         */
1710        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1711
1712        call_rcu(&dp->rcu, destroy_dp_rcu);
1713}
1714
1715static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1716{
1717        struct sk_buff *reply;
1718        struct datapath *dp;
1719        int err;
1720
1721        ovs_lock();
1722        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1723        err = PTR_ERR(dp);
1724        if (IS_ERR(dp))
1725                goto unlock;
1726
1727        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1728                                      info->snd_seq, OVS_DP_CMD_DEL);
1729        err = PTR_ERR(reply);
1730        if (IS_ERR(reply))
1731                goto unlock;
1732
1733        __dp_destroy(dp);
1734        ovs_unlock();
1735
1736        ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1737
1738        return 0;
1739unlock:
1740        ovs_unlock();
1741        return err;
1742}
1743
1744static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1745{
1746        struct sk_buff *reply;
1747        struct datapath *dp;
1748        int err;
1749
1750        ovs_lock();
1751        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1752        err = PTR_ERR(dp);
1753        if (IS_ERR(dp))
1754                goto unlock;
1755
1756        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1757                                      info->snd_seq, OVS_DP_CMD_NEW);
1758        if (IS_ERR(reply)) {
1759                err = PTR_ERR(reply);
1760                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1761                                ovs_dp_datapath_multicast_group.id, err);
1762                err = 0;
1763                goto unlock;
1764        }
1765
1766        ovs_unlock();
1767        ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1768
1769        return 0;
1770unlock:
1771        ovs_unlock();
1772        return err;
1773}
1774
1775static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1776{
1777        struct sk_buff *reply;
1778        struct datapath *dp;
1779        int err;
1780
1781        ovs_lock();
1782        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1783        if (IS_ERR(dp)) {
1784                err = PTR_ERR(dp);
1785                goto unlock;
1786        }
1787
1788        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1789                                      info->snd_seq, OVS_DP_CMD_NEW);
1790        if (IS_ERR(reply)) {
1791                err = PTR_ERR(reply);
1792                goto unlock;
1793        }
1794
1795        ovs_unlock();
1796        return genlmsg_reply(reply, info);
1797
1798unlock:
1799        ovs_unlock();
1800        return err;
1801}
1802
1803static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1804{
1805        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1806        struct datapath *dp;
1807        int skip = cb->args[0];
1808        int i = 0;
1809
1810        ovs_lock();
1811        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1812                if (i >= skip &&
1813                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1814                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1815                                         OVS_DP_CMD_NEW) < 0)
1816                        break;
1817                i++;
1818        }
1819        ovs_unlock();
1820
1821        cb->args[0] = i;
1822
1823        return skb->len;
1824}
1825
1826static struct genl_ops dp_datapath_genl_ops[] = {
1827        { .cmd = OVS_DP_CMD_NEW,
1828          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1829          .policy = datapath_policy,
1830          .doit = ovs_dp_cmd_new
1831        },
1832        { .cmd = OVS_DP_CMD_DEL,
1833          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1834          .policy = datapath_policy,
1835          .doit = ovs_dp_cmd_del
1836        },
1837        { .cmd = OVS_DP_CMD_GET,
1838          .flags = 0,               /* OK for unprivileged users. */
1839          .policy = datapath_policy,
1840          .doit = ovs_dp_cmd_get,
1841          .dumpit = ovs_dp_cmd_dump
1842        },
1843        { .cmd = OVS_DP_CMD_SET,
1844          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1845          .policy = datapath_policy,
1846          .doit = ovs_dp_cmd_set,
1847        },
1848};
1849
1850static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1851        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1852        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1853        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1854        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1855        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1856        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1857};
1858
1859static struct genl_family dp_vport_genl_family = {
1860        .id = GENL_ID_GENERATE,
1861        .hdrsize = sizeof(struct ovs_header),
1862        .name = OVS_VPORT_FAMILY,
1863        .version = OVS_VPORT_VERSION,
1864        .maxattr = OVS_VPORT_ATTR_MAX,
1865        .netnsok = true,
1866        .parallel_ops = true,
1867};
1868
1869struct genl_multicast_group ovs_dp_vport_multicast_group = {
1870        .name = OVS_VPORT_MCGROUP
1871};
1872
1873/* Called with ovs_mutex or RCU read lock. */
1874static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1875                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1876{
1877        struct ovs_header *ovs_header;
1878        struct ovs_vport_stats vport_stats;
1879        int err;
1880
1881        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1882                                 flags, cmd);
1883        if (!ovs_header)
1884                return -EMSGSIZE;
1885
1886        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1887
1888        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1889            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1890            nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1891            nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1892                goto nla_put_failure;
1893
1894        ovs_vport_get_stats(vport, &vport_stats);
1895        if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1896                    &vport_stats))
1897                goto nla_put_failure;
1898
1899        err = ovs_vport_get_options(vport, skb);
1900        if (err == -EMSGSIZE)
1901                goto error;
1902
1903        return genlmsg_end(skb, ovs_header);
1904
1905nla_put_failure:
1906        err = -EMSGSIZE;
1907error:
1908        genlmsg_cancel(skb, ovs_header);
1909        return err;
1910}
1911
1912/* Called with ovs_mutex or RCU read lock. */
1913struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1914                                         u32 seq, u8 cmd)
1915{
1916        struct sk_buff *skb;
1917        int retval;
1918
1919        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1920        if (!skb)
1921                return ERR_PTR(-ENOMEM);
1922
1923        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1924        BUG_ON(retval < 0);
1925
1926        return skb;
1927}
1928
1929/* Called with ovs_mutex or RCU read lock. */
1930static struct vport *lookup_vport(struct net *net,
1931                                  struct ovs_header *ovs_header,
1932                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1933{
1934        struct datapath *dp;
1935        struct vport *vport;
1936
1937        if (a[OVS_VPORT_ATTR_NAME]) {
1938                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1939                if (!vport)
1940                        return ERR_PTR(-ENODEV);
1941                if (ovs_header->dp_ifindex &&
1942                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1943                        return ERR_PTR(-ENODEV);
1944                return vport;
1945        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1946                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1947
1948                if (port_no >= DP_MAX_PORTS)
1949                        return ERR_PTR(-EFBIG);
1950
1951                dp = get_dp(net, ovs_header->dp_ifindex);
1952                if (!dp)
1953                        return ERR_PTR(-ENODEV);
1954
1955                vport = ovs_vport_ovsl_rcu(dp, port_no);
1956                if (!vport)
1957                        return ERR_PTR(-ENODEV);
1958                return vport;
1959        } else
1960                return ERR_PTR(-EINVAL);
1961}
1962
1963static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1964{
1965        struct nlattr **a = info->attrs;
1966        struct ovs_header *ovs_header = info->userhdr;
1967        struct vport_parms parms;
1968        struct sk_buff *reply;
1969        struct vport *vport;
1970        struct datapath *dp;
1971        u32 port_no;
1972        int err;
1973
1974        err = -EINVAL;
1975        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1976            !a[OVS_VPORT_ATTR_UPCALL_PID])
1977                goto exit;
1978
1979        ovs_lock();
1980        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1981        err = -ENODEV;
1982        if (!dp)
1983                goto exit_unlock;
1984
1985        if (a[OVS_VPORT_ATTR_PORT_NO]) {
1986                port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1987
1988                err = -EFBIG;
1989                if (port_no >= DP_MAX_PORTS)
1990                        goto exit_unlock;
1991
1992                vport = ovs_vport_ovsl(dp, port_no);
1993                err = -EBUSY;
1994                if (vport)
1995                        goto exit_unlock;
1996        } else {
1997                for (port_no = 1; ; port_no++) {
1998                        if (port_no >= DP_MAX_PORTS) {
1999                                err = -EFBIG;
2000                                goto exit_unlock;
2001                        }
2002                        vport = ovs_vport_ovsl(dp, port_no);
2003                        if (!vport)
2004                                break;
2005                }
2006        }
2007
2008        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2009        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2010        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2011        parms.dp = dp;
2012        parms.port_no = port_no;
2013        parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
2014
2015        vport = new_vport(&parms);
2016        err = PTR_ERR(vport);
2017        if (IS_ERR(vport))
2018                goto exit_unlock;
2019
2020        err = 0;
2021        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
2022                                         OVS_VPORT_CMD_NEW);
2023        if (IS_ERR(reply)) {
2024                err = PTR_ERR(reply);
2025                ovs_dp_detach_port(vport);
2026                goto exit_unlock;
2027        }
2028
2029        ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
2030
2031exit_unlock:
2032        ovs_unlock();
2033exit:
2034        return err;
2035}
2036
2037static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2038{
2039        struct nlattr **a = info->attrs;
2040        struct sk_buff *reply;
2041        struct vport *vport;
2042        int err;
2043
2044        ovs_lock();
2045        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2046        err = PTR_ERR(vport);
2047        if (IS_ERR(vport))
2048                goto exit_unlock;
2049
2050        if (a[OVS_VPORT_ATTR_TYPE] &&
2051            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2052                err = -EINVAL;
2053                goto exit_unlock;
2054        }
2055
2056        reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2057        if (!reply) {
2058                err = -ENOMEM;
2059                goto exit_unlock;
2060        }
2061
2062        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2063                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2064                if (err)
2065                        goto exit_free;
2066        }
2067
2068        if (a[OVS_VPORT_ATTR_UPCALL_PID])
2069                vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
2070
2071        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2072                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2073        BUG_ON(err < 0);
2074
2075        ovs_unlock();
2076        ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
2077        return 0;
2078
2079exit_free:
2080        kfree_skb(reply);
2081exit_unlock:
2082        ovs_unlock();
2083        return err;
2084}
2085
2086static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2087{
2088        struct nlattr **a = info->attrs;
2089        struct sk_buff *reply;
2090        struct vport *vport;
2091        int err;
2092
2093        ovs_lock();
2094        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2095        err = PTR_ERR(vport);
2096        if (IS_ERR(vport))
2097                goto exit_unlock;
2098
2099        if (vport->port_no == OVSP_LOCAL) {
2100                err = -EINVAL;
2101                goto exit_unlock;
2102        }
2103
2104        reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2105                                         info->snd_seq, OVS_VPORT_CMD_DEL);
2106        err = PTR_ERR(reply);
2107        if (IS_ERR(reply))
2108                goto exit_unlock;
2109
2110        err = 0;
2111        ovs_dp_detach_port(vport);
2112
2113        ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
2114
2115exit_unlock:
2116        ovs_unlock();
2117        return err;
2118}
2119
2120static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2121{
2122        struct nlattr **a = info->attrs;
2123        struct ovs_header *ovs_header = info->userhdr;
2124        struct sk_buff *reply;
2125        struct vport *vport;
2126        int err;
2127
2128        rcu_read_lock();
2129        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2130        err = PTR_ERR(vport);
2131        if (IS_ERR(vport))
2132                goto exit_unlock;
2133
2134        reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2135                                         info->snd_seq, OVS_VPORT_CMD_NEW);
2136        err = PTR_ERR(reply);
2137        if (IS_ERR(reply))
2138                goto exit_unlock;
2139
2140        rcu_read_unlock();
2141
2142        return genlmsg_reply(reply, info);
2143
2144exit_unlock:
2145        rcu_read_unlock();
2146        return err;
2147}
2148
2149static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2150{
2151        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2152        struct datapath *dp;
2153        int bucket = cb->args[0], skip = cb->args[1];
2154        int i, j = 0;
2155
2156        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2157        if (!dp)
2158                return -ENODEV;
2159
2160        rcu_read_lock();
2161        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2162                struct vport *vport;
2163
2164                j = 0;
2165                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2166                        if (j >= skip &&
2167                            ovs_vport_cmd_fill_info(vport, skb,
2168                                                    NETLINK_CB(cb->skb).portid,
2169                                                    cb->nlh->nlmsg_seq,
2170                                                    NLM_F_MULTI,
2171                                                    OVS_VPORT_CMD_NEW) < 0)
2172                                goto out;
2173
2174                        j++;
2175                }
2176                skip = 0;
2177        }
2178out:
2179        rcu_read_unlock();
2180
2181        cb->args[0] = i;
2182        cb->args[1] = j;
2183
2184        return skb->len;
2185}
2186
2187static struct genl_ops dp_vport_genl_ops[] = {
2188        { .cmd = OVS_VPORT_CMD_NEW,
2189          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2190          .policy = vport_policy,
2191          .doit = ovs_vport_cmd_new
2192        },
2193        { .cmd = OVS_VPORT_CMD_DEL,
2194          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2195          .policy = vport_policy,
2196          .doit = ovs_vport_cmd_del
2197        },
2198        { .cmd = OVS_VPORT_CMD_GET,
2199          .flags = 0,               /* OK for unprivileged users. */
2200          .policy = vport_policy,
2201          .doit = ovs_vport_cmd_get,
2202          .dumpit = ovs_vport_cmd_dump
2203        },
2204        { .cmd = OVS_VPORT_CMD_SET,
2205          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2206          .policy = vport_policy,
2207          .doit = ovs_vport_cmd_set,
2208        },
2209};
2210
2211struct genl_family_and_ops {
2212        struct genl_family *family;
2213        struct genl_ops *ops;
2214        int n_ops;
2215        struct genl_multicast_group *group;
2216};
2217
2218static const struct genl_family_and_ops dp_genl_families[] = {
2219        { &dp_datapath_genl_family,
2220          dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
2221          &ovs_dp_datapath_multicast_group },
2222        { &dp_vport_genl_family,
2223          dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
2224          &ovs_dp_vport_multicast_group },
2225        { &dp_flow_genl_family,
2226          dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
2227          &ovs_dp_flow_multicast_group },
2228        { &dp_packet_genl_family,
2229          dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2230          NULL },
2231};
2232
2233static void dp_unregister_genl(int n_families)
2234{
2235        int i;
2236
2237        for (i = 0; i < n_families; i++)
2238                genl_unregister_family(dp_genl_families[i].family);
2239}
2240
2241static int dp_register_genl(void)
2242{
2243        int n_registered;
2244        int err;
2245        int i;
2246
2247        n_registered = 0;
2248        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2249                const struct genl_family_and_ops *f = &dp_genl_families[i];
2250
2251                err = genl_register_family_with_ops(f->family, f->ops,
2252                                                    f->n_ops);
2253                if (err)
2254                        goto error;
2255                n_registered++;
2256
2257                if (f->group) {
2258                        err = genl_register_mc_group(f->family, f->group);
2259                        if (err)
2260                                goto error;
2261                }
2262        }
2263
2264        return 0;
2265
2266error:
2267        dp_unregister_genl(n_registered);
2268        return err;
2269}
2270
2271static void rehash_flow_table(struct work_struct *work)
2272{
2273        struct datapath *dp;
2274        struct net *net;
2275
2276        ovs_lock();
2277        rtnl_lock();
2278        for_each_net(net) {
2279                struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2280
2281                list_for_each_entry(dp, &ovs_net->dps, list_node) {
2282                        struct flow_table *old_table = ovsl_dereference(dp->table);
2283                        struct flow_table *new_table;
2284
2285                        new_table = ovs_flow_tbl_rehash(old_table);
2286                        if (!IS_ERR(new_table)) {
2287                                rcu_assign_pointer(dp->table, new_table);
2288                                ovs_flow_tbl_deferred_destroy(old_table);
2289                        }
2290                }
2291        }
2292        rtnl_unlock();
2293        ovs_unlock();
2294        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2295}
2296
2297static int __net_init ovs_init_net(struct net *net)
2298{
2299        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2300
2301        INIT_LIST_HEAD(&ovs_net->dps);
2302        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2303        return 0;
2304}
2305
2306static void __net_exit ovs_exit_net(struct net *net)
2307{
2308        struct datapath *dp, *dp_next;
2309        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2310
2311        ovs_lock();
2312        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2313                __dp_destroy(dp);
2314        ovs_unlock();
2315
2316        cancel_work_sync(&ovs_net->dp_notify_work);
2317}
2318
2319static struct pernet_operations ovs_net_ops = {
2320        .init = ovs_init_net,
2321        .exit = ovs_exit_net,
2322        .id   = &ovs_net_id,
2323        .size = sizeof(struct ovs_net),
2324};
2325
2326static int __init dp_init(void)
2327{
2328        int err;
2329
2330        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2331
2332        pr_info("Open vSwitch switching datapath\n");
2333
2334        err = ovs_flow_init();
2335        if (err)
2336                goto error;
2337
2338        err = ovs_vport_init();
2339        if (err)
2340                goto error_flow_exit;
2341
2342        err = register_pernet_device(&ovs_net_ops);
2343        if (err)
2344                goto error_vport_exit;
2345
2346        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2347        if (err)
2348                goto error_netns_exit;
2349
2350        err = dp_register_genl();
2351        if (err < 0)
2352                goto error_unreg_notifier;
2353
2354        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2355
2356        return 0;
2357
2358error_unreg_notifier:
2359        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2360error_netns_exit:
2361        unregister_pernet_device(&ovs_net_ops);
2362error_vport_exit:
2363        ovs_vport_exit();
2364error_flow_exit:
2365        ovs_flow_exit();
2366error:
2367        return err;
2368}
2369
2370static void dp_cleanup(void)
2371{
2372        cancel_delayed_work_sync(&rehash_flow_wq);
2373        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2374        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2375        unregister_pernet_device(&ovs_net_ops);
2376        rcu_barrier();
2377        ovs_vport_exit();
2378        ovs_flow_exit();
2379}
2380
2381module_init(dp_init);
2382module_exit(dp_cleanup);
2383
2384MODULE_DESCRIPTION("Open vSwitch switching datapath");
2385MODULE_LICENSE("GPL");
2386