linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2013 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/lockdep.h>
  48#include <linux/openvswitch.h>
  49#include <linux/rculist.h>
  50#include <linux/dmi.h>
  51#include <linux/workqueue.h>
  52#include <net/genetlink.h>
  53#include <net/net_namespace.h>
  54#include <net/netns/generic.h>
  55
  56#include "datapath.h"
  57#include "flow.h"
  58#include "vport-internal_dev.h"
  59#include "vport-netdev.h"
  60
  61
  62#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
  63static void rehash_flow_table(struct work_struct *work);
  64static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
  65
  66int ovs_net_id __read_mostly;
  67
  68static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
  69                       struct genl_multicast_group *grp)
  70{
  71        genl_notify(skb, genl_info_net(info), info->snd_portid,
  72                    grp->id, info->nlhdr, GFP_KERNEL);
  73}
  74
  75/**
  76 * DOC: Locking:
  77 *
  78 * All writes e.g. Writes to device state (add/remove datapath, port, set
  79 * operations on vports, etc.), Writes to other state (flow table
  80 * modifications, set miscellaneous datapath parameters, etc.) are protected
  81 * by ovs_lock.
  82 *
  83 * Reads are protected by RCU.
  84 *
  85 * There are a few special cases (mostly stats) that have their own
  86 * synchronization but they nest under all of above and don't interact with
  87 * each other.
  88 *
  89 * The RTNL lock nests inside ovs_mutex.
  90 */
  91
  92static DEFINE_MUTEX(ovs_mutex);
  93
  94void ovs_lock(void)
  95{
  96        mutex_lock(&ovs_mutex);
  97}
  98
  99void ovs_unlock(void)
 100{
 101        mutex_unlock(&ovs_mutex);
 102}
 103
 104#ifdef CONFIG_LOCKDEP
 105int lockdep_ovsl_is_held(void)
 106{
 107        if (debug_locks)
 108                return lockdep_is_held(&ovs_mutex);
 109        else
 110                return 1;
 111}
 112#endif
 113
 114static struct vport *new_vport(const struct vport_parms *);
 115static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
 116                             const struct dp_upcall_info *);
 117static int queue_userspace_packet(struct net *, int dp_ifindex,
 118                                  struct sk_buff *,
 119                                  const struct dp_upcall_info *);
 120
 121/* Must be called with rcu_read_lock or ovs_mutex. */
 122static struct datapath *get_dp(struct net *net, int dp_ifindex)
 123{
 124        struct datapath *dp = NULL;
 125        struct net_device *dev;
 126
 127        rcu_read_lock();
 128        dev = dev_get_by_index_rcu(net, dp_ifindex);
 129        if (dev) {
 130                struct vport *vport = ovs_internal_dev_get_vport(dev);
 131                if (vport)
 132                        dp = vport->dp;
 133        }
 134        rcu_read_unlock();
 135
 136        return dp;
 137}
 138
 139/* Must be called with rcu_read_lock or ovs_mutex. */
 140const char *ovs_dp_name(const struct datapath *dp)
 141{
 142        struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 143        return vport->ops->get_name(vport);
 144}
 145
 146static int get_dpifindex(struct datapath *dp)
 147{
 148        struct vport *local;
 149        int ifindex;
 150
 151        rcu_read_lock();
 152
 153        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 154        if (local)
 155                ifindex = netdev_vport_priv(local)->dev->ifindex;
 156        else
 157                ifindex = 0;
 158
 159        rcu_read_unlock();
 160
 161        return ifindex;
 162}
 163
 164static void destroy_dp_rcu(struct rcu_head *rcu)
 165{
 166        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 167
 168        ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
 169        free_percpu(dp->stats_percpu);
 170        release_net(ovs_dp_get_net(dp));
 171        kfree(dp->ports);
 172        kfree(dp);
 173}
 174
 175static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 176                                            u16 port_no)
 177{
 178        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 179}
 180
 181struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 182{
 183        struct vport *vport;
 184        struct hlist_head *head;
 185
 186        head = vport_hash_bucket(dp, port_no);
 187        hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 188                if (vport->port_no == port_no)
 189                        return vport;
 190        }
 191        return NULL;
 192}
 193
 194/* Called with ovs_mutex. */
 195static struct vport *new_vport(const struct vport_parms *parms)
 196{
 197        struct vport *vport;
 198
 199        vport = ovs_vport_add(parms);
 200        if (!IS_ERR(vport)) {
 201                struct datapath *dp = parms->dp;
 202                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 203
 204                hlist_add_head_rcu(&vport->dp_hash_node, head);
 205        }
 206        return vport;
 207}
 208
 209void ovs_dp_detach_port(struct vport *p)
 210{
 211        ASSERT_OVSL();
 212
 213        /* First drop references to device. */
 214        hlist_del_rcu(&p->dp_hash_node);
 215
 216        /* Then destroy it. */
 217        ovs_vport_del(p);
 218}
 219
 220/* Must be called with rcu_read_lock. */
 221void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 222{
 223        struct datapath *dp = p->dp;
 224        struct sw_flow *flow;
 225        struct dp_stats_percpu *stats;
 226        struct sw_flow_key key;
 227        u64 *stats_counter;
 228        int error;
 229
 230        stats = this_cpu_ptr(dp->stats_percpu);
 231
 232        /* Extract flow from 'skb' into 'key'. */
 233        error = ovs_flow_extract(skb, p->port_no, &key);
 234        if (unlikely(error)) {
 235                kfree_skb(skb);
 236                return;
 237        }
 238
 239        /* Look up flow. */
 240        flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
 241        if (unlikely(!flow)) {
 242                struct dp_upcall_info upcall;
 243
 244                upcall.cmd = OVS_PACKET_CMD_MISS;
 245                upcall.key = &key;
 246                upcall.userdata = NULL;
 247                upcall.portid = p->upcall_portid;
 248                ovs_dp_upcall(dp, skb, &upcall);
 249                consume_skb(skb);
 250                stats_counter = &stats->n_missed;
 251                goto out;
 252        }
 253
 254        OVS_CB(skb)->flow = flow;
 255        OVS_CB(skb)->pkt_key = &key;
 256
 257        stats_counter = &stats->n_hit;
 258        ovs_flow_used(OVS_CB(skb)->flow, skb);
 259        ovs_execute_actions(dp, skb);
 260
 261out:
 262        /* Update datapath statistics. */
 263        u64_stats_update_begin(&stats->sync);
 264        (*stats_counter)++;
 265        u64_stats_update_end(&stats->sync);
 266}
 267
 268static struct genl_family dp_packet_genl_family = {
 269        .id = GENL_ID_GENERATE,
 270        .hdrsize = sizeof(struct ovs_header),
 271        .name = OVS_PACKET_FAMILY,
 272        .version = OVS_PACKET_VERSION,
 273        .maxattr = OVS_PACKET_ATTR_MAX,
 274        .netnsok = true,
 275        .parallel_ops = true,
 276};
 277
 278int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 279                  const struct dp_upcall_info *upcall_info)
 280{
 281        struct dp_stats_percpu *stats;
 282        int dp_ifindex;
 283        int err;
 284
 285        if (upcall_info->portid == 0) {
 286                err = -ENOTCONN;
 287                goto err;
 288        }
 289
 290        dp_ifindex = get_dpifindex(dp);
 291        if (!dp_ifindex) {
 292                err = -ENODEV;
 293                goto err;
 294        }
 295
 296        if (!skb_is_gso(skb))
 297                err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 298        else
 299                err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 300        if (err)
 301                goto err;
 302
 303        return 0;
 304
 305err:
 306        stats = this_cpu_ptr(dp->stats_percpu);
 307
 308        u64_stats_update_begin(&stats->sync);
 309        stats->n_lost++;
 310        u64_stats_update_end(&stats->sync);
 311
 312        return err;
 313}
 314
 315static int queue_gso_packets(struct net *net, int dp_ifindex,
 316                             struct sk_buff *skb,
 317                             const struct dp_upcall_info *upcall_info)
 318{
 319        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 320        struct dp_upcall_info later_info;
 321        struct sw_flow_key later_key;
 322        struct sk_buff *segs, *nskb;
 323        int err;
 324
 325        segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
 326        if (IS_ERR(segs))
 327                return PTR_ERR(segs);
 328
 329        /* Queue all of the segments. */
 330        skb = segs;
 331        do {
 332                err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
 333                if (err)
 334                        break;
 335
 336                if (skb == segs && gso_type & SKB_GSO_UDP) {
 337                        /* The initial flow key extracted by ovs_flow_extract()
 338                         * in this case is for a first fragment, so we need to
 339                         * properly mark later fragments.
 340                         */
 341                        later_key = *upcall_info->key;
 342                        later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 343
 344                        later_info = *upcall_info;
 345                        later_info.key = &later_key;
 346                        upcall_info = &later_info;
 347                }
 348        } while ((skb = skb->next));
 349
 350        /* Free all of the segments. */
 351        skb = segs;
 352        do {
 353                nskb = skb->next;
 354                if (err)
 355                        kfree_skb(skb);
 356                else
 357                        consume_skb(skb);
 358        } while ((skb = nskb));
 359        return err;
 360}
 361
 362static size_t key_attr_size(void)
 363{
 364        return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 365                + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
 366                  + nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
 367                  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
 368                  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
 369                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
 370                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
 371                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 372                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
 373                + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 374                + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 375                + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 376                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 377                + nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */
 378                + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
 379                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 380                + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
 381                + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
 382                + nla_total_size(28); /* OVS_KEY_ATTR_ND */
 383}
 384
 385static size_t upcall_msg_size(const struct sk_buff *skb,
 386                              const struct nlattr *userdata)
 387{
 388        size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 389                + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
 390                + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
 391
 392        /* OVS_PACKET_ATTR_USERDATA */
 393        if (userdata)
 394                size += NLA_ALIGN(userdata->nla_len);
 395
 396        return size;
 397}
 398
 399static int queue_userspace_packet(struct net *net, int dp_ifindex,
 400                                  struct sk_buff *skb,
 401                                  const struct dp_upcall_info *upcall_info)
 402{
 403        struct ovs_header *upcall;
 404        struct sk_buff *nskb = NULL;
 405        struct sk_buff *user_skb; /* to be queued to userspace */
 406        struct nlattr *nla;
 407        int err;
 408
 409        if (vlan_tx_tag_present(skb)) {
 410                nskb = skb_clone(skb, GFP_ATOMIC);
 411                if (!nskb)
 412                        return -ENOMEM;
 413
 414                nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
 415                if (!nskb)
 416                        return -ENOMEM;
 417
 418                nskb->vlan_tci = 0;
 419                skb = nskb;
 420        }
 421
 422        if (nla_attr_size(skb->len) > USHRT_MAX) {
 423                err = -EFBIG;
 424                goto out;
 425        }
 426
 427        user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
 428        if (!user_skb) {
 429                err = -ENOMEM;
 430                goto out;
 431        }
 432
 433        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 434                             0, upcall_info->cmd);
 435        upcall->dp_ifindex = dp_ifindex;
 436
 437        nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
 438        ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
 439        nla_nest_end(user_skb, nla);
 440
 441        if (upcall_info->userdata)
 442                __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 443                          nla_len(upcall_info->userdata),
 444                          nla_data(upcall_info->userdata));
 445
 446        nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
 447
 448        skb_copy_and_csum_dev(skb, nla_data(nla));
 449
 450        genlmsg_end(user_skb, upcall);
 451        err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 452
 453out:
 454        kfree_skb(nskb);
 455        return err;
 456}
 457
 458/* Called with ovs_mutex. */
 459static int flush_flows(struct datapath *dp)
 460{
 461        struct flow_table *old_table;
 462        struct flow_table *new_table;
 463
 464        old_table = ovsl_dereference(dp->table);
 465        new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
 466        if (!new_table)
 467                return -ENOMEM;
 468
 469        rcu_assign_pointer(dp->table, new_table);
 470
 471        ovs_flow_tbl_destroy(old_table, true);
 472        return 0;
 473}
 474
 475static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
 476{
 477
 478        struct sw_flow_actions *acts;
 479        int new_acts_size;
 480        int req_size = NLA_ALIGN(attr_len);
 481        int next_offset = offsetof(struct sw_flow_actions, actions) +
 482                                        (*sfa)->actions_len;
 483
 484        if (req_size <= (ksize(*sfa) - next_offset))
 485                goto out;
 486
 487        new_acts_size = ksize(*sfa) * 2;
 488
 489        if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
 490                if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
 491                        return ERR_PTR(-EMSGSIZE);
 492                new_acts_size = MAX_ACTIONS_BUFSIZE;
 493        }
 494
 495        acts = ovs_flow_actions_alloc(new_acts_size);
 496        if (IS_ERR(acts))
 497                return (void *)acts;
 498
 499        memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
 500        acts->actions_len = (*sfa)->actions_len;
 501        kfree(*sfa);
 502        *sfa = acts;
 503
 504out:
 505        (*sfa)->actions_len += req_size;
 506        return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
 507}
 508
 509static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
 510{
 511        struct nlattr *a;
 512
 513        a = reserve_sfa_size(sfa, nla_attr_size(len));
 514        if (IS_ERR(a))
 515                return PTR_ERR(a);
 516
 517        a->nla_type = attrtype;
 518        a->nla_len = nla_attr_size(len);
 519
 520        if (data)
 521                memcpy(nla_data(a), data, len);
 522        memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
 523
 524        return 0;
 525}
 526
 527static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
 528{
 529        int used = (*sfa)->actions_len;
 530        int err;
 531
 532        err = add_action(sfa, attrtype, NULL, 0);
 533        if (err)
 534                return err;
 535
 536        return used;
 537}
 538
 539static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
 540{
 541        struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
 542
 543        a->nla_len = sfa->actions_len - st_offset;
 544}
 545
 546static int validate_and_copy_actions(const struct nlattr *attr,
 547                                     const struct sw_flow_key *key, int depth,
 548                                     struct sw_flow_actions **sfa);
 549
 550static int validate_and_copy_sample(const struct nlattr *attr,
 551                                    const struct sw_flow_key *key, int depth,
 552                                    struct sw_flow_actions **sfa)
 553{
 554        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
 555        const struct nlattr *probability, *actions;
 556        const struct nlattr *a;
 557        int rem, start, err, st_acts;
 558
 559        memset(attrs, 0, sizeof(attrs));
 560        nla_for_each_nested(a, attr, rem) {
 561                int type = nla_type(a);
 562                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
 563                        return -EINVAL;
 564                attrs[type] = a;
 565        }
 566        if (rem)
 567                return -EINVAL;
 568
 569        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
 570        if (!probability || nla_len(probability) != sizeof(u32))
 571                return -EINVAL;
 572
 573        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
 574        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
 575                return -EINVAL;
 576
 577        /* validation done, copy sample action. */
 578        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
 579        if (start < 0)
 580                return start;
 581        err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
 582        if (err)
 583                return err;
 584        st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
 585        if (st_acts < 0)
 586                return st_acts;
 587
 588        err = validate_and_copy_actions(actions, key, depth + 1, sfa);
 589        if (err)
 590                return err;
 591
 592        add_nested_action_end(*sfa, st_acts);
 593        add_nested_action_end(*sfa, start);
 594
 595        return 0;
 596}
 597
 598static int validate_tp_port(const struct sw_flow_key *flow_key)
 599{
 600        if (flow_key->eth.type == htons(ETH_P_IP)) {
 601                if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
 602                        return 0;
 603        } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
 604                if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
 605                        return 0;
 606        }
 607
 608        return -EINVAL;
 609}
 610
 611static int validate_and_copy_set_tun(const struct nlattr *attr,
 612                                     struct sw_flow_actions **sfa)
 613{
 614        struct sw_flow_match match;
 615        struct sw_flow_key key;
 616        int err, start;
 617
 618        ovs_match_init(&match, &key, NULL);
 619        err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
 620        if (err)
 621                return err;
 622
 623        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
 624        if (start < 0)
 625                return start;
 626
 627        err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
 628                        sizeof(match.key->tun_key));
 629        add_nested_action_end(*sfa, start);
 630
 631        return err;
 632}
 633
 634static int validate_set(const struct nlattr *a,
 635                        const struct sw_flow_key *flow_key,
 636                        struct sw_flow_actions **sfa,
 637                        bool *set_tun)
 638{
 639        const struct nlattr *ovs_key = nla_data(a);
 640        int key_type = nla_type(ovs_key);
 641
 642        /* There can be only one key in a action */
 643        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 644                return -EINVAL;
 645
 646        if (key_type > OVS_KEY_ATTR_MAX ||
 647           (ovs_key_lens[key_type] != nla_len(ovs_key) &&
 648            ovs_key_lens[key_type] != -1))
 649                return -EINVAL;
 650
 651        switch (key_type) {
 652        const struct ovs_key_ipv4 *ipv4_key;
 653        const struct ovs_key_ipv6 *ipv6_key;
 654        int err;
 655
 656        case OVS_KEY_ATTR_PRIORITY:
 657        case OVS_KEY_ATTR_SKB_MARK:
 658        case OVS_KEY_ATTR_ETHERNET:
 659                break;
 660
 661        case OVS_KEY_ATTR_TUNNEL:
 662                *set_tun = true;
 663                err = validate_and_copy_set_tun(a, sfa);
 664                if (err)
 665                        return err;
 666                break;
 667
 668        case OVS_KEY_ATTR_IPV4:
 669                if (flow_key->eth.type != htons(ETH_P_IP))
 670                        return -EINVAL;
 671
 672                if (!flow_key->ip.proto)
 673                        return -EINVAL;
 674
 675                ipv4_key = nla_data(ovs_key);
 676                if (ipv4_key->ipv4_proto != flow_key->ip.proto)
 677                        return -EINVAL;
 678
 679                if (ipv4_key->ipv4_frag != flow_key->ip.frag)
 680                        return -EINVAL;
 681
 682                break;
 683
 684        case OVS_KEY_ATTR_IPV6:
 685                if (flow_key->eth.type != htons(ETH_P_IPV6))
 686                        return -EINVAL;
 687
 688                if (!flow_key->ip.proto)
 689                        return -EINVAL;
 690
 691                ipv6_key = nla_data(ovs_key);
 692                if (ipv6_key->ipv6_proto != flow_key->ip.proto)
 693                        return -EINVAL;
 694
 695                if (ipv6_key->ipv6_frag != flow_key->ip.frag)
 696                        return -EINVAL;
 697
 698                if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
 699                        return -EINVAL;
 700
 701                break;
 702
 703        case OVS_KEY_ATTR_TCP:
 704                if (flow_key->ip.proto != IPPROTO_TCP)
 705                        return -EINVAL;
 706
 707                return validate_tp_port(flow_key);
 708
 709        case OVS_KEY_ATTR_UDP:
 710                if (flow_key->ip.proto != IPPROTO_UDP)
 711                        return -EINVAL;
 712
 713                return validate_tp_port(flow_key);
 714
 715        case OVS_KEY_ATTR_SCTP:
 716                if (flow_key->ip.proto != IPPROTO_SCTP)
 717                        return -EINVAL;
 718
 719                return validate_tp_port(flow_key);
 720
 721        default:
 722                return -EINVAL;
 723        }
 724
 725        return 0;
 726}
 727
 728static int validate_userspace(const struct nlattr *attr)
 729{
 730        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =   {
 731                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
 732                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
 733        };
 734        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 735        int error;
 736
 737        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
 738                                 attr, userspace_policy);
 739        if (error)
 740                return error;
 741
 742        if (!a[OVS_USERSPACE_ATTR_PID] ||
 743            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
 744                return -EINVAL;
 745
 746        return 0;
 747}
 748
 749static int copy_action(const struct nlattr *from,
 750                       struct sw_flow_actions **sfa)
 751{
 752        int totlen = NLA_ALIGN(from->nla_len);
 753        struct nlattr *to;
 754
 755        to = reserve_sfa_size(sfa, from->nla_len);
 756        if (IS_ERR(to))
 757                return PTR_ERR(to);
 758
 759        memcpy(to, from, totlen);
 760        return 0;
 761}
 762
 763static int validate_and_copy_actions(const struct nlattr *attr,
 764                                     const struct sw_flow_key *key,
 765                                     int depth,
 766                                     struct sw_flow_actions **sfa)
 767{
 768        const struct nlattr *a;
 769        int rem, err;
 770
 771        if (depth >= SAMPLE_ACTION_DEPTH)
 772                return -EOVERFLOW;
 773
 774        nla_for_each_nested(a, attr, rem) {
 775                /* Expected argument lengths, (u32)-1 for variable length. */
 776                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 777                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 778                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
 779                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 780                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
 781                        [OVS_ACTION_ATTR_SET] = (u32)-1,
 782                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
 783                };
 784                const struct ovs_action_push_vlan *vlan;
 785                int type = nla_type(a);
 786                bool skip_copy;
 787
 788                if (type > OVS_ACTION_ATTR_MAX ||
 789                    (action_lens[type] != nla_len(a) &&
 790                     action_lens[type] != (u32)-1))
 791                        return -EINVAL;
 792
 793                skip_copy = false;
 794                switch (type) {
 795                case OVS_ACTION_ATTR_UNSPEC:
 796                        return -EINVAL;
 797
 798                case OVS_ACTION_ATTR_USERSPACE:
 799                        err = validate_userspace(a);
 800                        if (err)
 801                                return err;
 802                        break;
 803
 804                case OVS_ACTION_ATTR_OUTPUT:
 805                        if (nla_get_u32(a) >= DP_MAX_PORTS)
 806                                return -EINVAL;
 807                        break;
 808
 809
 810                case OVS_ACTION_ATTR_POP_VLAN:
 811                        break;
 812
 813                case OVS_ACTION_ATTR_PUSH_VLAN:
 814                        vlan = nla_data(a);
 815                        if (vlan->vlan_tpid != htons(ETH_P_8021Q))
 816                                return -EINVAL;
 817                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
 818                                return -EINVAL;
 819                        break;
 820
 821                case OVS_ACTION_ATTR_SET:
 822                        err = validate_set(a, key, sfa, &skip_copy);
 823                        if (err)
 824                                return err;
 825                        break;
 826
 827                case OVS_ACTION_ATTR_SAMPLE:
 828                        err = validate_and_copy_sample(a, key, depth, sfa);
 829                        if (err)
 830                                return err;
 831                        skip_copy = true;
 832                        break;
 833
 834                default:
 835                        return -EINVAL;
 836                }
 837                if (!skip_copy) {
 838                        err = copy_action(a, sfa);
 839                        if (err)
 840                                return err;
 841                }
 842        }
 843
 844        if (rem > 0)
 845                return -EINVAL;
 846
 847        return 0;
 848}
 849
 850static void clear_stats(struct sw_flow *flow)
 851{
 852        flow->used = 0;
 853        flow->tcp_flags = 0;
 854        flow->packet_count = 0;
 855        flow->byte_count = 0;
 856}
 857
 858static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 859{
 860        struct ovs_header *ovs_header = info->userhdr;
 861        struct nlattr **a = info->attrs;
 862        struct sw_flow_actions *acts;
 863        struct sk_buff *packet;
 864        struct sw_flow *flow;
 865        struct datapath *dp;
 866        struct ethhdr *eth;
 867        int len;
 868        int err;
 869
 870        err = -EINVAL;
 871        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 872            !a[OVS_PACKET_ATTR_ACTIONS])
 873                goto err;
 874
 875        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 876        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 877        err = -ENOMEM;
 878        if (!packet)
 879                goto err;
 880        skb_reserve(packet, NET_IP_ALIGN);
 881
 882        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 883
 884        skb_reset_mac_header(packet);
 885        eth = eth_hdr(packet);
 886
 887        /* Normally, setting the skb 'protocol' field would be handled by a
 888         * call to eth_type_trans(), but it assumes there's a sending
 889         * device, which we may not have. */
 890        if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 891                packet->protocol = eth->h_proto;
 892        else
 893                packet->protocol = htons(ETH_P_802_2);
 894
 895        /* Build an sw_flow for sending this packet. */
 896        flow = ovs_flow_alloc();
 897        err = PTR_ERR(flow);
 898        if (IS_ERR(flow))
 899                goto err_kfree_skb;
 900
 901        err = ovs_flow_extract(packet, -1, &flow->key);
 902        if (err)
 903                goto err_flow_free;
 904
 905        err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
 906        if (err)
 907                goto err_flow_free;
 908        acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
 909        err = PTR_ERR(acts);
 910        if (IS_ERR(acts))
 911                goto err_flow_free;
 912
 913        err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
 914        rcu_assign_pointer(flow->sf_acts, acts);
 915        if (err)
 916                goto err_flow_free;
 917
 918        OVS_CB(packet)->flow = flow;
 919        OVS_CB(packet)->pkt_key = &flow->key;
 920        packet->priority = flow->key.phy.priority;
 921        packet->mark = flow->key.phy.skb_mark;
 922
 923        rcu_read_lock();
 924        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 925        err = -ENODEV;
 926        if (!dp)
 927                goto err_unlock;
 928
 929        local_bh_disable();
 930        err = ovs_execute_actions(dp, packet);
 931        local_bh_enable();
 932        rcu_read_unlock();
 933
 934        ovs_flow_free(flow, false);
 935        return err;
 936
 937err_unlock:
 938        rcu_read_unlock();
 939err_flow_free:
 940        ovs_flow_free(flow, false);
 941err_kfree_skb:
 942        kfree_skb(packet);
 943err:
 944        return err;
 945}
 946
 947static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 948        [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
 949        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 950        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 951};
 952
 953static struct genl_ops dp_packet_genl_ops[] = {
 954        { .cmd = OVS_PACKET_CMD_EXECUTE,
 955          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 956          .policy = packet_policy,
 957          .doit = ovs_packet_cmd_execute
 958        }
 959};
 960
 961static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 962{
 963        struct flow_table *table;
 964        int i;
 965
 966        table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held());
 967        stats->n_flows = ovs_flow_tbl_count(table);
 968
 969        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 970        for_each_possible_cpu(i) {
 971                const struct dp_stats_percpu *percpu_stats;
 972                struct dp_stats_percpu local_stats;
 973                unsigned int start;
 974
 975                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 976
 977                do {
 978                        start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
 979                        local_stats = *percpu_stats;
 980                } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 981
 982                stats->n_hit += local_stats.n_hit;
 983                stats->n_missed += local_stats.n_missed;
 984                stats->n_lost += local_stats.n_lost;
 985        }
 986}
 987
 988static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 989        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
 990        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 991        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 992};
 993
 994static struct genl_family dp_flow_genl_family = {
 995        .id = GENL_ID_GENERATE,
 996        .hdrsize = sizeof(struct ovs_header),
 997        .name = OVS_FLOW_FAMILY,
 998        .version = OVS_FLOW_VERSION,
 999        .maxattr = OVS_FLOW_ATTR_MAX,
1000        .netnsok = true,
1001        .parallel_ops = true,
1002};
1003
1004static struct genl_multicast_group ovs_dp_flow_multicast_group = {
1005        .name = OVS_FLOW_MCGROUP
1006};
1007
1008static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
1009static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1010{
1011        const struct nlattr *a;
1012        struct nlattr *start;
1013        int err = 0, rem;
1014
1015        start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1016        if (!start)
1017                return -EMSGSIZE;
1018
1019        nla_for_each_nested(a, attr, rem) {
1020                int type = nla_type(a);
1021                struct nlattr *st_sample;
1022
1023                switch (type) {
1024                case OVS_SAMPLE_ATTR_PROBABILITY:
1025                        if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
1026                                return -EMSGSIZE;
1027                        break;
1028                case OVS_SAMPLE_ATTR_ACTIONS:
1029                        st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1030                        if (!st_sample)
1031                                return -EMSGSIZE;
1032                        err = actions_to_attr(nla_data(a), nla_len(a), skb);
1033                        if (err)
1034                                return err;
1035                        nla_nest_end(skb, st_sample);
1036                        break;
1037                }
1038        }
1039
1040        nla_nest_end(skb, start);
1041        return err;
1042}
1043
1044static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1045{
1046        const struct nlattr *ovs_key = nla_data(a);
1047        int key_type = nla_type(ovs_key);
1048        struct nlattr *start;
1049        int err;
1050
1051        switch (key_type) {
1052        case OVS_KEY_ATTR_IPV4_TUNNEL:
1053                start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1054                if (!start)
1055                        return -EMSGSIZE;
1056
1057                err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
1058                                             nla_data(ovs_key));
1059                if (err)
1060                        return err;
1061                nla_nest_end(skb, start);
1062                break;
1063        default:
1064                if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1065                        return -EMSGSIZE;
1066                break;
1067        }
1068
1069        return 0;
1070}
1071
1072static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
1073{
1074        const struct nlattr *a;
1075        int rem, err;
1076
1077        nla_for_each_attr(a, attr, len, rem) {
1078                int type = nla_type(a);
1079
1080                switch (type) {
1081                case OVS_ACTION_ATTR_SET:
1082                        err = set_action_to_attr(a, skb);
1083                        if (err)
1084                                return err;
1085                        break;
1086
1087                case OVS_ACTION_ATTR_SAMPLE:
1088                        err = sample_action_to_attr(a, skb);
1089                        if (err)
1090                                return err;
1091                        break;
1092                default:
1093                        if (nla_put(skb, type, nla_len(a), nla_data(a)))
1094                                return -EMSGSIZE;
1095                        break;
1096                }
1097        }
1098
1099        return 0;
1100}
1101
1102static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
1103{
1104        return NLMSG_ALIGN(sizeof(struct ovs_header))
1105                + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
1106                + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
1107                + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
1108                + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
1109                + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
1110                + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
1111}
1112
1113/* Called with ovs_mutex. */
1114static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
1115                                  struct sk_buff *skb, u32 portid,
1116                                  u32 seq, u32 flags, u8 cmd)
1117{
1118        const int skb_orig_len = skb->len;
1119        struct nlattr *start;
1120        struct ovs_flow_stats stats;
1121        struct ovs_header *ovs_header;
1122        struct nlattr *nla;
1123        unsigned long used;
1124        u8 tcp_flags;
1125        int err;
1126
1127        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
1128        if (!ovs_header)
1129                return -EMSGSIZE;
1130
1131        ovs_header->dp_ifindex = get_dpifindex(dp);
1132
1133        /* Fill flow key. */
1134        nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
1135        if (!nla)
1136                goto nla_put_failure;
1137
1138        err = ovs_flow_to_nlattrs(&flow->unmasked_key,
1139                        &flow->unmasked_key, skb);
1140        if (err)
1141                goto error;
1142        nla_nest_end(skb, nla);
1143
1144        nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
1145        if (!nla)
1146                goto nla_put_failure;
1147
1148        err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
1149        if (err)
1150                goto error;
1151
1152        nla_nest_end(skb, nla);
1153
1154        spin_lock_bh(&flow->lock);
1155        used = flow->used;
1156        stats.n_packets = flow->packet_count;
1157        stats.n_bytes = flow->byte_count;
1158        tcp_flags = flow->tcp_flags;
1159        spin_unlock_bh(&flow->lock);
1160
1161        if (used &&
1162            nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
1163                goto nla_put_failure;
1164
1165        if (stats.n_packets &&
1166            nla_put(skb, OVS_FLOW_ATTR_STATS,
1167                    sizeof(struct ovs_flow_stats), &stats))
1168                goto nla_put_failure;
1169
1170        if (tcp_flags &&
1171            nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
1172                goto nla_put_failure;
1173
1174        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
1175         * this is the first flow to be dumped into 'skb'.  This is unusual for
1176         * Netlink but individual action lists can be longer than
1177         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
1178         * The userspace caller can always fetch the actions separately if it
1179         * really wants them.  (Most userspace callers in fact don't care.)
1180         *
1181         * This can only fail for dump operations because the skb is always
1182         * properly sized for single flows.
1183         */
1184        start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
1185        if (start) {
1186                const struct sw_flow_actions *sf_acts;
1187
1188                sf_acts = rcu_dereference_check(flow->sf_acts,
1189                                                lockdep_ovsl_is_held());
1190
1191                err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
1192                if (!err)
1193                        nla_nest_end(skb, start);
1194                else {
1195                        if (skb_orig_len)
1196                                goto error;
1197
1198                        nla_nest_cancel(skb, start);
1199                }
1200        } else if (skb_orig_len)
1201                goto nla_put_failure;
1202
1203        return genlmsg_end(skb, ovs_header);
1204
1205nla_put_failure:
1206        err = -EMSGSIZE;
1207error:
1208        genlmsg_cancel(skb, ovs_header);
1209        return err;
1210}
1211
1212static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
1213{
1214        const struct sw_flow_actions *sf_acts;
1215
1216        sf_acts = ovsl_dereference(flow->sf_acts);
1217
1218        return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
1219}
1220
1221static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
1222                                               struct datapath *dp,
1223                                               u32 portid, u32 seq, u8 cmd)
1224{
1225        struct sk_buff *skb;
1226        int retval;
1227
1228        skb = ovs_flow_cmd_alloc_info(flow);
1229        if (!skb)
1230                return ERR_PTR(-ENOMEM);
1231
1232        retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
1233        BUG_ON(retval < 0);
1234        return skb;
1235}
1236
1237static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1238{
1239        struct nlattr **a = info->attrs;
1240        struct ovs_header *ovs_header = info->userhdr;
1241        struct sw_flow_key key, masked_key;
1242        struct sw_flow *flow = NULL;
1243        struct sw_flow_mask mask;
1244        struct sk_buff *reply;
1245        struct datapath *dp;
1246        struct flow_table *table;
1247        struct sw_flow_actions *acts = NULL;
1248        struct sw_flow_match match;
1249        int error;
1250
1251        /* Extract key. */
1252        error = -EINVAL;
1253        if (!a[OVS_FLOW_ATTR_KEY])
1254                goto error;
1255
1256        ovs_match_init(&match, &key, &mask);
1257        error = ovs_match_from_nlattrs(&match,
1258                        a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
1259        if (error)
1260                goto error;
1261
1262        /* Validate actions. */
1263        if (a[OVS_FLOW_ATTR_ACTIONS]) {
1264                acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
1265                error = PTR_ERR(acts);
1266                if (IS_ERR(acts))
1267                        goto error;
1268
1269                ovs_flow_key_mask(&masked_key, &key, &mask);
1270                error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
1271                                                  &masked_key, 0, &acts);
1272                if (error) {
1273                        OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
1274                        goto err_kfree;
1275                }
1276        } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
1277                error = -EINVAL;
1278                goto error;
1279        }
1280
1281        ovs_lock();
1282        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1283        error = -ENODEV;
1284        if (!dp)
1285                goto err_unlock_ovs;
1286
1287        table = ovsl_dereference(dp->table);
1288
1289        /* Check if this is a duplicate flow */
1290        flow = ovs_flow_lookup(table, &key);
1291        if (!flow) {
1292                struct sw_flow_mask *mask_p;
1293                /* Bail out if we're not allowed to create a new flow. */
1294                error = -ENOENT;
1295                if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
1296                        goto err_unlock_ovs;
1297
1298                /* Expand table, if necessary, to make room. */
1299                if (ovs_flow_tbl_need_to_expand(table)) {
1300                        struct flow_table *new_table;
1301
1302                        new_table = ovs_flow_tbl_expand(table);
1303                        if (!IS_ERR(new_table)) {
1304                                rcu_assign_pointer(dp->table, new_table);
1305                                ovs_flow_tbl_destroy(table, true);
1306                                table = ovsl_dereference(dp->table);
1307                        }
1308                }
1309
1310                /* Allocate flow. */
1311                flow = ovs_flow_alloc();
1312                if (IS_ERR(flow)) {
1313                        error = PTR_ERR(flow);
1314                        goto err_unlock_ovs;
1315                }
1316                clear_stats(flow);
1317
1318                flow->key = masked_key;
1319                flow->unmasked_key = key;
1320
1321                /* Make sure mask is unique in the system */
1322                mask_p = ovs_sw_flow_mask_find(table, &mask);
1323                if (!mask_p) {
1324                        /* Allocate a new mask if none exsits. */
1325                        mask_p = ovs_sw_flow_mask_alloc();
1326                        if (!mask_p)
1327                                goto err_flow_free;
1328                        mask_p->key = mask.key;
1329                        mask_p->range = mask.range;
1330                        ovs_sw_flow_mask_insert(table, mask_p);
1331                }
1332
1333                ovs_sw_flow_mask_add_ref(mask_p);
1334                flow->mask = mask_p;
1335                rcu_assign_pointer(flow->sf_acts, acts);
1336
1337                /* Put flow in bucket. */
1338                ovs_flow_insert(table, flow);
1339
1340                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1341                                                info->snd_seq, OVS_FLOW_CMD_NEW);
1342        } else {
1343                /* We found a matching flow. */
1344                struct sw_flow_actions *old_acts;
1345
1346                /* Bail out if we're not allowed to modify an existing flow.
1347                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1348                 * because Generic Netlink treats the latter as a dump
1349                 * request.  We also accept NLM_F_EXCL in case that bug ever
1350                 * gets fixed.
1351                 */
1352                error = -EEXIST;
1353                if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1354                    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1355                        goto err_unlock_ovs;
1356
1357                /* The unmasked key has to be the same for flow updates. */
1358                error = -EINVAL;
1359                if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
1360                        OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
1361                        goto err_unlock_ovs;
1362                }
1363
1364                /* Update actions. */
1365                old_acts = ovsl_dereference(flow->sf_acts);
1366                rcu_assign_pointer(flow->sf_acts, acts);
1367                ovs_flow_deferred_free_acts(old_acts);
1368
1369                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1370                                               info->snd_seq, OVS_FLOW_CMD_NEW);
1371
1372                /* Clear stats. */
1373                if (a[OVS_FLOW_ATTR_CLEAR]) {
1374                        spin_lock_bh(&flow->lock);
1375                        clear_stats(flow);
1376                        spin_unlock_bh(&flow->lock);
1377                }
1378        }
1379        ovs_unlock();
1380
1381        if (!IS_ERR(reply))
1382                ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1383        else
1384                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1385                                ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1386        return 0;
1387
1388err_flow_free:
1389        ovs_flow_free(flow, false);
1390err_unlock_ovs:
1391        ovs_unlock();
1392err_kfree:
1393        kfree(acts);
1394error:
1395        return error;
1396}
1397
1398static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1399{
1400        struct nlattr **a = info->attrs;
1401        struct ovs_header *ovs_header = info->userhdr;
1402        struct sw_flow_key key;
1403        struct sk_buff *reply;
1404        struct sw_flow *flow;
1405        struct datapath *dp;
1406        struct flow_table *table;
1407        struct sw_flow_match match;
1408        int err;
1409
1410        if (!a[OVS_FLOW_ATTR_KEY]) {
1411                OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
1412                return -EINVAL;
1413        }
1414
1415        ovs_match_init(&match, &key, NULL);
1416        err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1417        if (err)
1418                return err;
1419
1420        ovs_lock();
1421        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1422        if (!dp) {
1423                err = -ENODEV;
1424                goto unlock;
1425        }
1426
1427        table = ovsl_dereference(dp->table);
1428        flow = ovs_flow_lookup_unmasked_key(table, &match);
1429        if (!flow) {
1430                err = -ENOENT;
1431                goto unlock;
1432        }
1433
1434        reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1435                                        info->snd_seq, OVS_FLOW_CMD_NEW);
1436        if (IS_ERR(reply)) {
1437                err = PTR_ERR(reply);
1438                goto unlock;
1439        }
1440
1441        ovs_unlock();
1442        return genlmsg_reply(reply, info);
1443unlock:
1444        ovs_unlock();
1445        return err;
1446}
1447
1448static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1449{
1450        struct nlattr **a = info->attrs;
1451        struct ovs_header *ovs_header = info->userhdr;
1452        struct sw_flow_key key;
1453        struct sk_buff *reply;
1454        struct sw_flow *flow;
1455        struct datapath *dp;
1456        struct flow_table *table;
1457        struct sw_flow_match match;
1458        int err;
1459
1460        ovs_lock();
1461        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1462        if (!dp) {
1463                err = -ENODEV;
1464                goto unlock;
1465        }
1466
1467        if (!a[OVS_FLOW_ATTR_KEY]) {
1468                err = flush_flows(dp);
1469                goto unlock;
1470        }
1471
1472        ovs_match_init(&match, &key, NULL);
1473        err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1474        if (err)
1475                goto unlock;
1476
1477        table = ovsl_dereference(dp->table);
1478        flow = ovs_flow_lookup_unmasked_key(table, &match);
1479        if (!flow) {
1480                err = -ENOENT;
1481                goto unlock;
1482        }
1483
1484        reply = ovs_flow_cmd_alloc_info(flow);
1485        if (!reply) {
1486                err = -ENOMEM;
1487                goto unlock;
1488        }
1489
1490        ovs_flow_remove(table, flow);
1491
1492        err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1493                                     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1494        BUG_ON(err < 0);
1495
1496        ovs_flow_free(flow, true);
1497        ovs_unlock();
1498
1499        ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1500        return 0;
1501unlock:
1502        ovs_unlock();
1503        return err;
1504}
1505
1506static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1507{
1508        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1509        struct datapath *dp;
1510        struct flow_table *table;
1511
1512        rcu_read_lock();
1513        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1514        if (!dp) {
1515                rcu_read_unlock();
1516                return -ENODEV;
1517        }
1518
1519        table = rcu_dereference(dp->table);
1520        for (;;) {
1521                struct sw_flow *flow;
1522                u32 bucket, obj;
1523
1524                bucket = cb->args[0];
1525                obj = cb->args[1];
1526                flow = ovs_flow_dump_next(table, &bucket, &obj);
1527                if (!flow)
1528                        break;
1529
1530                if (ovs_flow_cmd_fill_info(flow, dp, skb,
1531                                           NETLINK_CB(cb->skb).portid,
1532                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1533                                           OVS_FLOW_CMD_NEW) < 0)
1534                        break;
1535
1536                cb->args[0] = bucket;
1537                cb->args[1] = obj;
1538        }
1539        rcu_read_unlock();
1540        return skb->len;
1541}
1542
1543static struct genl_ops dp_flow_genl_ops[] = {
1544        { .cmd = OVS_FLOW_CMD_NEW,
1545          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1546          .policy = flow_policy,
1547          .doit = ovs_flow_cmd_new_or_set
1548        },
1549        { .cmd = OVS_FLOW_CMD_DEL,
1550          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1551          .policy = flow_policy,
1552          .doit = ovs_flow_cmd_del
1553        },
1554        { .cmd = OVS_FLOW_CMD_GET,
1555          .flags = 0,               /* OK for unprivileged users. */
1556          .policy = flow_policy,
1557          .doit = ovs_flow_cmd_get,
1558          .dumpit = ovs_flow_cmd_dump
1559        },
1560        { .cmd = OVS_FLOW_CMD_SET,
1561          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1562          .policy = flow_policy,
1563          .doit = ovs_flow_cmd_new_or_set,
1564        },
1565};
1566
1567static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1568        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1569        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1570};
1571
1572static struct genl_family dp_datapath_genl_family = {
1573        .id = GENL_ID_GENERATE,
1574        .hdrsize = sizeof(struct ovs_header),
1575        .name = OVS_DATAPATH_FAMILY,
1576        .version = OVS_DATAPATH_VERSION,
1577        .maxattr = OVS_DP_ATTR_MAX,
1578        .netnsok = true,
1579        .parallel_ops = true,
1580};
1581
1582static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1583        .name = OVS_DATAPATH_MCGROUP
1584};
1585
1586static size_t ovs_dp_cmd_msg_size(void)
1587{
1588        size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1589
1590        msgsize += nla_total_size(IFNAMSIZ);
1591        msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1592
1593        return msgsize;
1594}
1595
1596static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1597                                u32 portid, u32 seq, u32 flags, u8 cmd)
1598{
1599        struct ovs_header *ovs_header;
1600        struct ovs_dp_stats dp_stats;
1601        int err;
1602
1603        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1604                                   flags, cmd);
1605        if (!ovs_header)
1606                goto error;
1607
1608        ovs_header->dp_ifindex = get_dpifindex(dp);
1609
1610        rcu_read_lock();
1611        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1612        rcu_read_unlock();
1613        if (err)
1614                goto nla_put_failure;
1615
1616        get_dp_stats(dp, &dp_stats);
1617        if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1618                goto nla_put_failure;
1619
1620        return genlmsg_end(skb, ovs_header);
1621
1622nla_put_failure:
1623        genlmsg_cancel(skb, ovs_header);
1624error:
1625        return -EMSGSIZE;
1626}
1627
1628static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1629                                             u32 seq, u8 cmd)
1630{
1631        struct sk_buff *skb;
1632        int retval;
1633
1634        skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1635        if (!skb)
1636                return ERR_PTR(-ENOMEM);
1637
1638        retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1639        if (retval < 0) {
1640                kfree_skb(skb);
1641                return ERR_PTR(retval);
1642        }
1643        return skb;
1644}
1645
1646/* Called with ovs_mutex. */
1647static struct datapath *lookup_datapath(struct net *net,
1648                                        struct ovs_header *ovs_header,
1649                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1650{
1651        struct datapath *dp;
1652
1653        if (!a[OVS_DP_ATTR_NAME])
1654                dp = get_dp(net, ovs_header->dp_ifindex);
1655        else {
1656                struct vport *vport;
1657
1658                rcu_read_lock();
1659                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1660                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1661                rcu_read_unlock();
1662        }
1663        return dp ? dp : ERR_PTR(-ENODEV);
1664}
1665
1666static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1667{
1668        struct nlattr **a = info->attrs;
1669        struct vport_parms parms;
1670        struct sk_buff *reply;
1671        struct datapath *dp;
1672        struct vport *vport;
1673        struct ovs_net *ovs_net;
1674        int err, i;
1675
1676        err = -EINVAL;
1677        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1678                goto err;
1679
1680        ovs_lock();
1681
1682        err = -ENOMEM;
1683        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1684        if (dp == NULL)
1685                goto err_unlock_ovs;
1686
1687        ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1688
1689        /* Allocate table. */
1690        err = -ENOMEM;
1691        rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1692        if (!dp->table)
1693                goto err_free_dp;
1694
1695        dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1696        if (!dp->stats_percpu) {
1697                err = -ENOMEM;
1698                goto err_destroy_table;
1699        }
1700
1701        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1702                        GFP_KERNEL);
1703        if (!dp->ports) {
1704                err = -ENOMEM;
1705                goto err_destroy_percpu;
1706        }
1707
1708        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1709                INIT_HLIST_HEAD(&dp->ports[i]);
1710
1711        /* Set up our datapath device. */
1712        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1713        parms.type = OVS_VPORT_TYPE_INTERNAL;
1714        parms.options = NULL;
1715        parms.dp = dp;
1716        parms.port_no = OVSP_LOCAL;
1717        parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1718
1719        vport = new_vport(&parms);
1720        if (IS_ERR(vport)) {
1721                err = PTR_ERR(vport);
1722                if (err == -EBUSY)
1723                        err = -EEXIST;
1724
1725                goto err_destroy_ports_array;
1726        }
1727
1728        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1729                                      info->snd_seq, OVS_DP_CMD_NEW);
1730        err = PTR_ERR(reply);
1731        if (IS_ERR(reply))
1732                goto err_destroy_local_port;
1733
1734        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1735        list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1736
1737        ovs_unlock();
1738
1739        ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1740        return 0;
1741
1742err_destroy_local_port:
1743        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1744err_destroy_ports_array:
1745        kfree(dp->ports);
1746err_destroy_percpu:
1747        free_percpu(dp->stats_percpu);
1748err_destroy_table:
1749        ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
1750err_free_dp:
1751        release_net(ovs_dp_get_net(dp));
1752        kfree(dp);
1753err_unlock_ovs:
1754        ovs_unlock();
1755err:
1756        return err;
1757}
1758
1759/* Called with ovs_mutex. */
1760static void __dp_destroy(struct datapath *dp)
1761{
1762        int i;
1763
1764        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1765                struct vport *vport;
1766                struct hlist_node *n;
1767
1768                hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1769                        if (vport->port_no != OVSP_LOCAL)
1770                                ovs_dp_detach_port(vport);
1771        }
1772
1773        list_del_rcu(&dp->list_node);
1774
1775        /* OVSP_LOCAL is datapath internal port. We need to make sure that
1776         * all port in datapath are destroyed first before freeing datapath.
1777         */
1778        ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1779
1780        call_rcu(&dp->rcu, destroy_dp_rcu);
1781}
1782
1783static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1784{
1785        struct sk_buff *reply;
1786        struct datapath *dp;
1787        int err;
1788
1789        ovs_lock();
1790        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1791        err = PTR_ERR(dp);
1792        if (IS_ERR(dp))
1793                goto unlock;
1794
1795        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1796                                      info->snd_seq, OVS_DP_CMD_DEL);
1797        err = PTR_ERR(reply);
1798        if (IS_ERR(reply))
1799                goto unlock;
1800
1801        __dp_destroy(dp);
1802        ovs_unlock();
1803
1804        ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1805
1806        return 0;
1807unlock:
1808        ovs_unlock();
1809        return err;
1810}
1811
1812static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1813{
1814        struct sk_buff *reply;
1815        struct datapath *dp;
1816        int err;
1817
1818        ovs_lock();
1819        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1820        err = PTR_ERR(dp);
1821        if (IS_ERR(dp))
1822                goto unlock;
1823
1824        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1825                                      info->snd_seq, OVS_DP_CMD_NEW);
1826        if (IS_ERR(reply)) {
1827                err = PTR_ERR(reply);
1828                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1829                                ovs_dp_datapath_multicast_group.id, err);
1830                err = 0;
1831                goto unlock;
1832        }
1833
1834        ovs_unlock();
1835        ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1836
1837        return 0;
1838unlock:
1839        ovs_unlock();
1840        return err;
1841}
1842
1843static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1844{
1845        struct sk_buff *reply;
1846        struct datapath *dp;
1847        int err;
1848
1849        ovs_lock();
1850        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1851        if (IS_ERR(dp)) {
1852                err = PTR_ERR(dp);
1853                goto unlock;
1854        }
1855
1856        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1857                                      info->snd_seq, OVS_DP_CMD_NEW);
1858        if (IS_ERR(reply)) {
1859                err = PTR_ERR(reply);
1860                goto unlock;
1861        }
1862
1863        ovs_unlock();
1864        return genlmsg_reply(reply, info);
1865
1866unlock:
1867        ovs_unlock();
1868        return err;
1869}
1870
1871static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1872{
1873        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1874        struct datapath *dp;
1875        int skip = cb->args[0];
1876        int i = 0;
1877
1878        rcu_read_lock();
1879        list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
1880                if (i >= skip &&
1881                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1882                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1883                                         OVS_DP_CMD_NEW) < 0)
1884                        break;
1885                i++;
1886        }
1887        rcu_read_unlock();
1888
1889        cb->args[0] = i;
1890
1891        return skb->len;
1892}
1893
1894static struct genl_ops dp_datapath_genl_ops[] = {
1895        { .cmd = OVS_DP_CMD_NEW,
1896          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1897          .policy = datapath_policy,
1898          .doit = ovs_dp_cmd_new
1899        },
1900        { .cmd = OVS_DP_CMD_DEL,
1901          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1902          .policy = datapath_policy,
1903          .doit = ovs_dp_cmd_del
1904        },
1905        { .cmd = OVS_DP_CMD_GET,
1906          .flags = 0,               /* OK for unprivileged users. */
1907          .policy = datapath_policy,
1908          .doit = ovs_dp_cmd_get,
1909          .dumpit = ovs_dp_cmd_dump
1910        },
1911        { .cmd = OVS_DP_CMD_SET,
1912          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1913          .policy = datapath_policy,
1914          .doit = ovs_dp_cmd_set,
1915        },
1916};
1917
1918static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1919        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1920        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1921        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1922        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1923        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1924        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1925};
1926
1927static struct genl_family dp_vport_genl_family = {
1928        .id = GENL_ID_GENERATE,
1929        .hdrsize = sizeof(struct ovs_header),
1930        .name = OVS_VPORT_FAMILY,
1931        .version = OVS_VPORT_VERSION,
1932        .maxattr = OVS_VPORT_ATTR_MAX,
1933        .netnsok = true,
1934        .parallel_ops = true,
1935};
1936
1937struct genl_multicast_group ovs_dp_vport_multicast_group = {
1938        .name = OVS_VPORT_MCGROUP
1939};
1940
1941/* Called with ovs_mutex or RCU read lock. */
1942static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1943                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1944{
1945        struct ovs_header *ovs_header;
1946        struct ovs_vport_stats vport_stats;
1947        int err;
1948
1949        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1950                                 flags, cmd);
1951        if (!ovs_header)
1952                return -EMSGSIZE;
1953
1954        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1955
1956        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1957            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1958            nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1959            nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1960                goto nla_put_failure;
1961
1962        ovs_vport_get_stats(vport, &vport_stats);
1963        if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1964                    &vport_stats))
1965                goto nla_put_failure;
1966
1967        err = ovs_vport_get_options(vport, skb);
1968        if (err == -EMSGSIZE)
1969                goto error;
1970
1971        return genlmsg_end(skb, ovs_header);
1972
1973nla_put_failure:
1974        err = -EMSGSIZE;
1975error:
1976        genlmsg_cancel(skb, ovs_header);
1977        return err;
1978}
1979
1980/* Called with ovs_mutex or RCU read lock. */
1981struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1982                                         u32 seq, u8 cmd)
1983{
1984        struct sk_buff *skb;
1985        int retval;
1986
1987        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1988        if (!skb)
1989                return ERR_PTR(-ENOMEM);
1990
1991        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1992        BUG_ON(retval < 0);
1993
1994        return skb;
1995}
1996
1997/* Called with ovs_mutex or RCU read lock. */
1998static struct vport *lookup_vport(struct net *net,
1999                                  struct ovs_header *ovs_header,
2000                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2001{
2002        struct datapath *dp;
2003        struct vport *vport;
2004
2005        if (a[OVS_VPORT_ATTR_NAME]) {
2006                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2007                if (!vport)
2008                        return ERR_PTR(-ENODEV);
2009                if (ovs_header->dp_ifindex &&
2010                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2011                        return ERR_PTR(-ENODEV);
2012                return vport;
2013        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2014                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2015
2016                if (port_no >= DP_MAX_PORTS)
2017                        return ERR_PTR(-EFBIG);
2018
2019                dp = get_dp(net, ovs_header->dp_ifindex);
2020                if (!dp)
2021                        return ERR_PTR(-ENODEV);
2022
2023                vport = ovs_vport_ovsl_rcu(dp, port_no);
2024                if (!vport)
2025                        return ERR_PTR(-ENODEV);
2026                return vport;
2027        } else
2028                return ERR_PTR(-EINVAL);
2029}
2030
2031static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2032{
2033        struct nlattr **a = info->attrs;
2034        struct ovs_header *ovs_header = info->userhdr;
2035        struct vport_parms parms;
2036        struct sk_buff *reply;
2037        struct vport *vport;
2038        struct datapath *dp;
2039        u32 port_no;
2040        int err;
2041
2042        err = -EINVAL;
2043        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2044            !a[OVS_VPORT_ATTR_UPCALL_PID])
2045                goto exit;
2046
2047        ovs_lock();
2048        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2049        err = -ENODEV;
2050        if (!dp)
2051                goto exit_unlock;
2052
2053        if (a[OVS_VPORT_ATTR_PORT_NO]) {
2054                port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2055
2056                err = -EFBIG;
2057                if (port_no >= DP_MAX_PORTS)
2058                        goto exit_unlock;
2059
2060                vport = ovs_vport_ovsl(dp, port_no);
2061                err = -EBUSY;
2062                if (vport)
2063                        goto exit_unlock;
2064        } else {
2065                for (port_no = 1; ; port_no++) {
2066                        if (port_no >= DP_MAX_PORTS) {
2067                                err = -EFBIG;
2068                                goto exit_unlock;
2069                        }
2070                        vport = ovs_vport_ovsl(dp, port_no);
2071                        if (!vport)
2072                                break;
2073                }
2074        }
2075
2076        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2077        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2078        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2079        parms.dp = dp;
2080        parms.port_no = port_no;
2081        parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
2082
2083        vport = new_vport(&parms);
2084        err = PTR_ERR(vport);
2085        if (IS_ERR(vport))
2086                goto exit_unlock;
2087
2088        err = 0;
2089        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
2090                                         OVS_VPORT_CMD_NEW);
2091        if (IS_ERR(reply)) {
2092                err = PTR_ERR(reply);
2093                ovs_dp_detach_port(vport);
2094                goto exit_unlock;
2095        }
2096
2097        ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
2098
2099exit_unlock:
2100        ovs_unlock();
2101exit:
2102        return err;
2103}
2104
2105static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2106{
2107        struct nlattr **a = info->attrs;
2108        struct sk_buff *reply;
2109        struct vport *vport;
2110        int err;
2111
2112        ovs_lock();
2113        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2114        err = PTR_ERR(vport);
2115        if (IS_ERR(vport))
2116                goto exit_unlock;
2117
2118        if (a[OVS_VPORT_ATTR_TYPE] &&
2119            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2120                err = -EINVAL;
2121                goto exit_unlock;
2122        }
2123
2124        reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2125        if (!reply) {
2126                err = -ENOMEM;
2127                goto exit_unlock;
2128        }
2129
2130        if (a[OVS_VPORT_ATTR_OPTIONS]) {
2131                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2132                if (err)
2133                        goto exit_free;
2134        }
2135
2136        if (a[OVS_VPORT_ATTR_UPCALL_PID])
2137                vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
2138
2139        err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2140                                      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2141        BUG_ON(err < 0);
2142
2143        ovs_unlock();
2144        ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
2145        return 0;
2146
2147exit_free:
2148        kfree_skb(reply);
2149exit_unlock:
2150        ovs_unlock();
2151        return err;
2152}
2153
2154static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2155{
2156        struct nlattr **a = info->attrs;
2157        struct sk_buff *reply;
2158        struct vport *vport;
2159        int err;
2160
2161        ovs_lock();
2162        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2163        err = PTR_ERR(vport);
2164        if (IS_ERR(vport))
2165                goto exit_unlock;
2166
2167        if (vport->port_no == OVSP_LOCAL) {
2168                err = -EINVAL;
2169                goto exit_unlock;
2170        }
2171
2172        reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2173                                         info->snd_seq, OVS_VPORT_CMD_DEL);
2174        err = PTR_ERR(reply);
2175        if (IS_ERR(reply))
2176                goto exit_unlock;
2177
2178        err = 0;
2179        ovs_dp_detach_port(vport);
2180
2181        ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
2182
2183exit_unlock:
2184        ovs_unlock();
2185        return err;
2186}
2187
2188static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2189{
2190        struct nlattr **a = info->attrs;
2191        struct ovs_header *ovs_header = info->userhdr;
2192        struct sk_buff *reply;
2193        struct vport *vport;
2194        int err;
2195
2196        rcu_read_lock();
2197        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2198        err = PTR_ERR(vport);
2199        if (IS_ERR(vport))
2200                goto exit_unlock;
2201
2202        reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
2203                                         info->snd_seq, OVS_VPORT_CMD_NEW);
2204        err = PTR_ERR(reply);
2205        if (IS_ERR(reply))
2206                goto exit_unlock;
2207
2208        rcu_read_unlock();
2209
2210        return genlmsg_reply(reply, info);
2211
2212exit_unlock:
2213        rcu_read_unlock();
2214        return err;
2215}
2216
2217static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2218{
2219        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2220        struct datapath *dp;
2221        int bucket = cb->args[0], skip = cb->args[1];
2222        int i, j = 0;
2223
2224        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2225        if (!dp)
2226                return -ENODEV;
2227
2228        rcu_read_lock();
2229        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2230                struct vport *vport;
2231
2232                j = 0;
2233                hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2234                        if (j >= skip &&
2235                            ovs_vport_cmd_fill_info(vport, skb,
2236                                                    NETLINK_CB(cb->skb).portid,
2237                                                    cb->nlh->nlmsg_seq,
2238                                                    NLM_F_MULTI,
2239                                                    OVS_VPORT_CMD_NEW) < 0)
2240                                goto out;
2241
2242                        j++;
2243                }
2244                skip = 0;
2245        }
2246out:
2247        rcu_read_unlock();
2248
2249        cb->args[0] = i;
2250        cb->args[1] = j;
2251
2252        return skb->len;
2253}
2254
2255static struct genl_ops dp_vport_genl_ops[] = {
2256        { .cmd = OVS_VPORT_CMD_NEW,
2257          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2258          .policy = vport_policy,
2259          .doit = ovs_vport_cmd_new
2260        },
2261        { .cmd = OVS_VPORT_CMD_DEL,
2262          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2263          .policy = vport_policy,
2264          .doit = ovs_vport_cmd_del
2265        },
2266        { .cmd = OVS_VPORT_CMD_GET,
2267          .flags = 0,               /* OK for unprivileged users. */
2268          .policy = vport_policy,
2269          .doit = ovs_vport_cmd_get,
2270          .dumpit = ovs_vport_cmd_dump
2271        },
2272        { .cmd = OVS_VPORT_CMD_SET,
2273          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2274          .policy = vport_policy,
2275          .doit = ovs_vport_cmd_set,
2276        },
2277};
2278
2279struct genl_family_and_ops {
2280        struct genl_family *family;
2281        struct genl_ops *ops;
2282        int n_ops;
2283        struct genl_multicast_group *group;
2284};
2285
2286static const struct genl_family_and_ops dp_genl_families[] = {
2287        { &dp_datapath_genl_family,
2288          dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
2289          &ovs_dp_datapath_multicast_group },
2290        { &dp_vport_genl_family,
2291          dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
2292          &ovs_dp_vport_multicast_group },
2293        { &dp_flow_genl_family,
2294          dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
2295          &ovs_dp_flow_multicast_group },
2296        { &dp_packet_genl_family,
2297          dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2298          NULL },
2299};
2300
2301static void dp_unregister_genl(int n_families)
2302{
2303        int i;
2304
2305        for (i = 0; i < n_families; i++)
2306                genl_unregister_family(dp_genl_families[i].family);
2307}
2308
2309static int dp_register_genl(void)
2310{
2311        int n_registered;
2312        int err;
2313        int i;
2314
2315        n_registered = 0;
2316        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2317                const struct genl_family_and_ops *f = &dp_genl_families[i];
2318
2319                err = genl_register_family_with_ops(f->family, f->ops,
2320                                                    f->n_ops);
2321                if (err)
2322                        goto error;
2323                n_registered++;
2324
2325                if (f->group) {
2326                        err = genl_register_mc_group(f->family, f->group);
2327                        if (err)
2328                                goto error;
2329                }
2330        }
2331
2332        return 0;
2333
2334error:
2335        dp_unregister_genl(n_registered);
2336        return err;
2337}
2338
2339static void rehash_flow_table(struct work_struct *work)
2340{
2341        struct datapath *dp;
2342        struct net *net;
2343
2344        ovs_lock();
2345        rtnl_lock();
2346        for_each_net(net) {
2347                struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2348
2349                list_for_each_entry(dp, &ovs_net->dps, list_node) {
2350                        struct flow_table *old_table = ovsl_dereference(dp->table);
2351                        struct flow_table *new_table;
2352
2353                        new_table = ovs_flow_tbl_rehash(old_table);
2354                        if (!IS_ERR(new_table)) {
2355                                rcu_assign_pointer(dp->table, new_table);
2356                                ovs_flow_tbl_destroy(old_table, true);
2357                        }
2358                }
2359        }
2360        rtnl_unlock();
2361        ovs_unlock();
2362        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2363}
2364
2365static int __net_init ovs_init_net(struct net *net)
2366{
2367        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2368
2369        INIT_LIST_HEAD(&ovs_net->dps);
2370        INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2371        return 0;
2372}
2373
2374static void __net_exit ovs_exit_net(struct net *net)
2375{
2376        struct datapath *dp, *dp_next;
2377        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2378
2379        ovs_lock();
2380        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2381                __dp_destroy(dp);
2382        ovs_unlock();
2383
2384        cancel_work_sync(&ovs_net->dp_notify_work);
2385}
2386
2387static struct pernet_operations ovs_net_ops = {
2388        .init = ovs_init_net,
2389        .exit = ovs_exit_net,
2390        .id   = &ovs_net_id,
2391        .size = sizeof(struct ovs_net),
2392};
2393
2394static int __init dp_init(void)
2395{
2396        int err;
2397
2398        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2399
2400        pr_info("Open vSwitch switching datapath\n");
2401
2402        err = ovs_flow_init();
2403        if (err)
2404                goto error;
2405
2406        err = ovs_vport_init();
2407        if (err)
2408                goto error_flow_exit;
2409
2410        err = register_pernet_device(&ovs_net_ops);
2411        if (err)
2412                goto error_vport_exit;
2413
2414        err = register_netdevice_notifier(&ovs_dp_device_notifier);
2415        if (err)
2416                goto error_netns_exit;
2417
2418        err = dp_register_genl();
2419        if (err < 0)
2420                goto error_unreg_notifier;
2421
2422        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2423
2424        return 0;
2425
2426error_unreg_notifier:
2427        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2428error_netns_exit:
2429        unregister_pernet_device(&ovs_net_ops);
2430error_vport_exit:
2431        ovs_vport_exit();
2432error_flow_exit:
2433        ovs_flow_exit();
2434error:
2435        return err;
2436}
2437
2438static void dp_cleanup(void)
2439{
2440        cancel_delayed_work_sync(&rehash_flow_wq);
2441        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2442        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2443        unregister_pernet_device(&ovs_net_ops);
2444        rcu_barrier();
2445        ovs_vport_exit();
2446        ovs_flow_exit();
2447}
2448
2449module_init(dp_init);
2450module_exit(dp_cleanup);
2451
2452MODULE_DESCRIPTION("Open vSwitch switching datapath");
2453MODULE_LICENSE("GPL");
2454