linux/net/openvswitch/actions.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2014 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/skbuff.h>
  22#include <linux/in.h>
  23#include <linux/ip.h>
  24#include <linux/openvswitch.h>
  25#include <linux/sctp.h>
  26#include <linux/tcp.h>
  27#include <linux/udp.h>
  28#include <linux/in6.h>
  29#include <linux/if_arp.h>
  30#include <linux/if_vlan.h>
  31
  32#include <net/ip.h>
  33#include <net/ipv6.h>
  34#include <net/checksum.h>
  35#include <net/dsfield.h>
  36#include <net/mpls.h>
  37#include <net/sctp/checksum.h>
  38
  39#include "datapath.h"
  40#include "flow.h"
  41#include "vport.h"
  42
  43static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  44                              struct sw_flow_key *key,
  45                              const struct nlattr *attr, int len);
  46
  47struct deferred_action {
  48        struct sk_buff *skb;
  49        const struct nlattr *actions;
  50
  51        /* Store pkt_key clone when creating deferred action. */
  52        struct sw_flow_key pkt_key;
  53};
  54
  55#define DEFERRED_ACTION_FIFO_SIZE 10
  56struct action_fifo {
  57        int head;
  58        int tail;
  59        /* Deferred action fifo queue storage. */
  60        struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
  61};
  62
  63static struct action_fifo __percpu *action_fifos;
  64static DEFINE_PER_CPU(int, exec_actions_level);
  65
  66static void action_fifo_init(struct action_fifo *fifo)
  67{
  68        fifo->head = 0;
  69        fifo->tail = 0;
  70}
  71
  72static bool action_fifo_is_empty(const struct action_fifo *fifo)
  73{
  74        return (fifo->head == fifo->tail);
  75}
  76
  77static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
  78{
  79        if (action_fifo_is_empty(fifo))
  80                return NULL;
  81
  82        return &fifo->fifo[fifo->tail++];
  83}
  84
  85static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
  86{
  87        if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
  88                return NULL;
  89
  90        return &fifo->fifo[fifo->head++];
  91}
  92
  93/* Return true if fifo is not full */
  94static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
  95                                                    const struct sw_flow_key *key,
  96                                                    const struct nlattr *attr)
  97{
  98        struct action_fifo *fifo;
  99        struct deferred_action *da;
 100
 101        fifo = this_cpu_ptr(action_fifos);
 102        da = action_fifo_put(fifo);
 103        if (da) {
 104                da->skb = skb;
 105                da->actions = attr;
 106                da->pkt_key = *key;
 107        }
 108
 109        return da;
 110}
 111
 112static void invalidate_flow_key(struct sw_flow_key *key)
 113{
 114        key->eth.type = htons(0);
 115}
 116
 117static bool is_flow_key_valid(const struct sw_flow_key *key)
 118{
 119        return !!key->eth.type;
 120}
 121
 122static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 123                     const struct ovs_action_push_mpls *mpls)
 124{
 125        __be32 *new_mpls_lse;
 126        struct ethhdr *hdr;
 127
 128        /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
 129        if (skb->encapsulation)
 130                return -ENOTSUPP;
 131
 132        if (skb_cow_head(skb, MPLS_HLEN) < 0)
 133                return -ENOMEM;
 134
 135        skb_push(skb, MPLS_HLEN);
 136        memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
 137                skb->mac_len);
 138        skb_reset_mac_header(skb);
 139
 140        new_mpls_lse = (__be32 *)skb_mpls_header(skb);
 141        *new_mpls_lse = mpls->mpls_lse;
 142
 143        if (skb->ip_summed == CHECKSUM_COMPLETE)
 144                skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
 145                                                             MPLS_HLEN, 0));
 146
 147        hdr = eth_hdr(skb);
 148        hdr->h_proto = mpls->mpls_ethertype;
 149
 150        if (!skb->inner_protocol)
 151                skb_set_inner_protocol(skb, skb->protocol);
 152        skb->protocol = mpls->mpls_ethertype;
 153
 154        invalidate_flow_key(key);
 155        return 0;
 156}
 157
 158static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 159                    const __be16 ethertype)
 160{
 161        struct ethhdr *hdr;
 162        int err;
 163
 164        err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
 165        if (unlikely(err))
 166                return err;
 167
 168        skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
 169
 170        memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
 171                skb->mac_len);
 172
 173        __skb_pull(skb, MPLS_HLEN);
 174        skb_reset_mac_header(skb);
 175
 176        /* skb_mpls_header() is used to locate the ethertype
 177         * field correctly in the presence of VLAN tags.
 178         */
 179        hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
 180        hdr->h_proto = ethertype;
 181        if (eth_p_mpls(skb->protocol))
 182                skb->protocol = ethertype;
 183
 184        invalidate_flow_key(key);
 185        return 0;
 186}
 187
 188/* 'KEY' must not have any bits set outside of the 'MASK' */
 189#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
 190#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
 191
 192static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
 193                    const __be32 *mpls_lse, const __be32 *mask)
 194{
 195        __be32 *stack;
 196        __be32 lse;
 197        int err;
 198
 199        err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
 200        if (unlikely(err))
 201                return err;
 202
 203        stack = (__be32 *)skb_mpls_header(skb);
 204        lse = MASKED(*stack, *mpls_lse, *mask);
 205        if (skb->ip_summed == CHECKSUM_COMPLETE) {
 206                __be32 diff[] = { ~(*stack), lse };
 207
 208                skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 209                                          ~skb->csum);
 210        }
 211
 212        *stack = lse;
 213        flow_key->mpls.top_lse = lse;
 214        return 0;
 215}
 216
 217static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 218{
 219        int err;
 220
 221        err = skb_vlan_pop(skb);
 222        if (skb_vlan_tag_present(skb))
 223                invalidate_flow_key(key);
 224        else
 225                key->eth.tci = 0;
 226        return err;
 227}
 228
 229static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 230                     const struct ovs_action_push_vlan *vlan)
 231{
 232        if (skb_vlan_tag_present(skb))
 233                invalidate_flow_key(key);
 234        else
 235                key->eth.tci = vlan->vlan_tci;
 236        return skb_vlan_push(skb, vlan->vlan_tpid,
 237                             ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 238}
 239
 240/* 'src' is already properly masked. */
 241static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
 242{
 243        u16 *dst = (u16 *)dst_;
 244        const u16 *src = (const u16 *)src_;
 245        const u16 *mask = (const u16 *)mask_;
 246
 247        SET_MASKED(dst[0], src[0], mask[0]);
 248        SET_MASKED(dst[1], src[1], mask[1]);
 249        SET_MASKED(dst[2], src[2], mask[2]);
 250}
 251
 252static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
 253                        const struct ovs_key_ethernet *key,
 254                        const struct ovs_key_ethernet *mask)
 255{
 256        int err;
 257
 258        err = skb_ensure_writable(skb, ETH_HLEN);
 259        if (unlikely(err))
 260                return err;
 261
 262        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 263
 264        ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
 265                               mask->eth_src);
 266        ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
 267                               mask->eth_dst);
 268
 269        ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 270
 271        ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
 272        ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
 273        return 0;
 274}
 275
 276static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 277                        __be32 *addr, __be32 new_addr)
 278{
 279        int transport_len = skb->len - skb_transport_offset(skb);
 280
 281        if (nh->protocol == IPPROTO_TCP) {
 282                if (likely(transport_len >= sizeof(struct tcphdr)))
 283                        inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
 284                                                 *addr, new_addr, 1);
 285        } else if (nh->protocol == IPPROTO_UDP) {
 286                if (likely(transport_len >= sizeof(struct udphdr))) {
 287                        struct udphdr *uh = udp_hdr(skb);
 288
 289                        if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 290                                inet_proto_csum_replace4(&uh->check, skb,
 291                                                         *addr, new_addr, 1);
 292                                if (!uh->check)
 293                                        uh->check = CSUM_MANGLED_0;
 294                        }
 295                }
 296        }
 297
 298        csum_replace4(&nh->check, *addr, new_addr);
 299        skb_clear_hash(skb);
 300        *addr = new_addr;
 301}
 302
 303static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 304                                 __be32 addr[4], const __be32 new_addr[4])
 305{
 306        int transport_len = skb->len - skb_transport_offset(skb);
 307
 308        if (l4_proto == NEXTHDR_TCP) {
 309                if (likely(transport_len >= sizeof(struct tcphdr)))
 310                        inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
 311                                                  addr, new_addr, 1);
 312        } else if (l4_proto == NEXTHDR_UDP) {
 313                if (likely(transport_len >= sizeof(struct udphdr))) {
 314                        struct udphdr *uh = udp_hdr(skb);
 315
 316                        if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 317                                inet_proto_csum_replace16(&uh->check, skb,
 318                                                          addr, new_addr, 1);
 319                                if (!uh->check)
 320                                        uh->check = CSUM_MANGLED_0;
 321                        }
 322                }
 323        } else if (l4_proto == NEXTHDR_ICMP) {
 324                if (likely(transport_len >= sizeof(struct icmp6hdr)))
 325                        inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
 326                                                  skb, addr, new_addr, 1);
 327        }
 328}
 329
 330static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
 331                           const __be32 mask[4], __be32 masked[4])
 332{
 333        masked[0] = MASKED(old[0], addr[0], mask[0]);
 334        masked[1] = MASKED(old[1], addr[1], mask[1]);
 335        masked[2] = MASKED(old[2], addr[2], mask[2]);
 336        masked[3] = MASKED(old[3], addr[3], mask[3]);
 337}
 338
 339static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 340                          __be32 addr[4], const __be32 new_addr[4],
 341                          bool recalculate_csum)
 342{
 343        if (recalculate_csum)
 344                update_ipv6_checksum(skb, l4_proto, addr, new_addr);
 345
 346        skb_clear_hash(skb);
 347        memcpy(addr, new_addr, sizeof(__be32[4]));
 348}
 349
 350static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
 351{
 352        /* Bits 21-24 are always unmasked, so this retains their values. */
 353        SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
 354        SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
 355        SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
 356}
 357
 358static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
 359                       u8 mask)
 360{
 361        new_ttl = MASKED(nh->ttl, new_ttl, mask);
 362
 363        csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 364        nh->ttl = new_ttl;
 365}
 366
 367static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
 368                    const struct ovs_key_ipv4 *key,
 369                    const struct ovs_key_ipv4 *mask)
 370{
 371        struct iphdr *nh;
 372        __be32 new_addr;
 373        int err;
 374
 375        err = skb_ensure_writable(skb, skb_network_offset(skb) +
 376                                  sizeof(struct iphdr));
 377        if (unlikely(err))
 378                return err;
 379
 380        nh = ip_hdr(skb);
 381
 382        /* Setting an IP addresses is typically only a side effect of
 383         * matching on them in the current userspace implementation, so it
 384         * makes sense to check if the value actually changed.
 385         */
 386        if (mask->ipv4_src) {
 387                new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
 388
 389                if (unlikely(new_addr != nh->saddr)) {
 390                        set_ip_addr(skb, nh, &nh->saddr, new_addr);
 391                        flow_key->ipv4.addr.src = new_addr;
 392                }
 393        }
 394        if (mask->ipv4_dst) {
 395                new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
 396
 397                if (unlikely(new_addr != nh->daddr)) {
 398                        set_ip_addr(skb, nh, &nh->daddr, new_addr);
 399                        flow_key->ipv4.addr.dst = new_addr;
 400                }
 401        }
 402        if (mask->ipv4_tos) {
 403                ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
 404                flow_key->ip.tos = nh->tos;
 405        }
 406        if (mask->ipv4_ttl) {
 407                set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
 408                flow_key->ip.ttl = nh->ttl;
 409        }
 410
 411        return 0;
 412}
 413
 414static bool is_ipv6_mask_nonzero(const __be32 addr[4])
 415{
 416        return !!(addr[0] | addr[1] | addr[2] | addr[3]);
 417}
 418
 419static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 420                    const struct ovs_key_ipv6 *key,
 421                    const struct ovs_key_ipv6 *mask)
 422{
 423        struct ipv6hdr *nh;
 424        int err;
 425
 426        err = skb_ensure_writable(skb, skb_network_offset(skb) +
 427                                  sizeof(struct ipv6hdr));
 428        if (unlikely(err))
 429                return err;
 430
 431        nh = ipv6_hdr(skb);
 432
 433        /* Setting an IP addresses is typically only a side effect of
 434         * matching on them in the current userspace implementation, so it
 435         * makes sense to check if the value actually changed.
 436         */
 437        if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
 438                __be32 *saddr = (__be32 *)&nh->saddr;
 439                __be32 masked[4];
 440
 441                mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
 442
 443                if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
 444                        set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
 445                                      true);
 446                        memcpy(&flow_key->ipv6.addr.src, masked,
 447                               sizeof(flow_key->ipv6.addr.src));
 448                }
 449        }
 450        if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
 451                unsigned int offset = 0;
 452                int flags = IP6_FH_F_SKIP_RH;
 453                bool recalc_csum = true;
 454                __be32 *daddr = (__be32 *)&nh->daddr;
 455                __be32 masked[4];
 456
 457                mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
 458
 459                if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
 460                        if (ipv6_ext_hdr(nh->nexthdr))
 461                                recalc_csum = (ipv6_find_hdr(skb, &offset,
 462                                                             NEXTHDR_ROUTING,
 463                                                             NULL, &flags)
 464                                               != NEXTHDR_ROUTING);
 465
 466                        set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
 467                                      recalc_csum);
 468                        memcpy(&flow_key->ipv6.addr.dst, masked,
 469                               sizeof(flow_key->ipv6.addr.dst));
 470                }
 471        }
 472        if (mask->ipv6_tclass) {
 473                ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
 474                flow_key->ip.tos = ipv6_get_dsfield(nh);
 475        }
 476        if (mask->ipv6_label) {
 477                set_ipv6_fl(nh, ntohl(key->ipv6_label),
 478                            ntohl(mask->ipv6_label));
 479                flow_key->ipv6.label =
 480                    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
 481        }
 482        if (mask->ipv6_hlimit) {
 483                SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
 484                flow_key->ip.ttl = nh->hop_limit;
 485        }
 486        return 0;
 487}
 488
 489/* Must follow skb_ensure_writable() since that can move the skb data. */
 490static void set_tp_port(struct sk_buff *skb, __be16 *port,
 491                        __be16 new_port, __sum16 *check)
 492{
 493        inet_proto_csum_replace2(check, skb, *port, new_port, 0);
 494        *port = new_port;
 495}
 496
 497static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 498                   const struct ovs_key_udp *key,
 499                   const struct ovs_key_udp *mask)
 500{
 501        struct udphdr *uh;
 502        __be16 src, dst;
 503        int err;
 504
 505        err = skb_ensure_writable(skb, skb_transport_offset(skb) +
 506                                  sizeof(struct udphdr));
 507        if (unlikely(err))
 508                return err;
 509
 510        uh = udp_hdr(skb);
 511        /* Either of the masks is non-zero, so do not bother checking them. */
 512        src = MASKED(uh->source, key->udp_src, mask->udp_src);
 513        dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
 514
 515        if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
 516                if (likely(src != uh->source)) {
 517                        set_tp_port(skb, &uh->source, src, &uh->check);
 518                        flow_key->tp.src = src;
 519                }
 520                if (likely(dst != uh->dest)) {
 521                        set_tp_port(skb, &uh->dest, dst, &uh->check);
 522                        flow_key->tp.dst = dst;
 523                }
 524
 525                if (unlikely(!uh->check))
 526                        uh->check = CSUM_MANGLED_0;
 527        } else {
 528                uh->source = src;
 529                uh->dest = dst;
 530                flow_key->tp.src = src;
 531                flow_key->tp.dst = dst;
 532        }
 533
 534        skb_clear_hash(skb);
 535
 536        return 0;
 537}
 538
 539static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 540                   const struct ovs_key_tcp *key,
 541                   const struct ovs_key_tcp *mask)
 542{
 543        struct tcphdr *th;
 544        __be16 src, dst;
 545        int err;
 546
 547        err = skb_ensure_writable(skb, skb_transport_offset(skb) +
 548                                  sizeof(struct tcphdr));
 549        if (unlikely(err))
 550                return err;
 551
 552        th = tcp_hdr(skb);
 553        src = MASKED(th->source, key->tcp_src, mask->tcp_src);
 554        if (likely(src != th->source)) {
 555                set_tp_port(skb, &th->source, src, &th->check);
 556                flow_key->tp.src = src;
 557        }
 558        dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
 559        if (likely(dst != th->dest)) {
 560                set_tp_port(skb, &th->dest, dst, &th->check);
 561                flow_key->tp.dst = dst;
 562        }
 563        skb_clear_hash(skb);
 564
 565        return 0;
 566}
 567
 568static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 569                    const struct ovs_key_sctp *key,
 570                    const struct ovs_key_sctp *mask)
 571{
 572        unsigned int sctphoff = skb_transport_offset(skb);
 573        struct sctphdr *sh;
 574        __le32 old_correct_csum, new_csum, old_csum;
 575        int err;
 576
 577        err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
 578        if (unlikely(err))
 579                return err;
 580
 581        sh = sctp_hdr(skb);
 582        old_csum = sh->checksum;
 583        old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 584
 585        sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
 586        sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
 587
 588        new_csum = sctp_compute_cksum(skb, sctphoff);
 589
 590        /* Carry any checksum errors through. */
 591        sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
 592
 593        skb_clear_hash(skb);
 594        flow_key->tp.src = sh->source;
 595        flow_key->tp.dst = sh->dest;
 596
 597        return 0;
 598}
 599
 600static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 601{
 602        struct vport *vport = ovs_vport_rcu(dp, out_port);
 603
 604        if (likely(vport))
 605                ovs_vport_send(vport, skb);
 606        else
 607                kfree_skb(skb);
 608}
 609
 610static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 611                            struct sw_flow_key *key, const struct nlattr *attr)
 612{
 613        struct ovs_tunnel_info info;
 614        struct dp_upcall_info upcall;
 615        const struct nlattr *a;
 616        int rem;
 617
 618        upcall.cmd = OVS_PACKET_CMD_ACTION;
 619        upcall.userdata = NULL;
 620        upcall.portid = 0;
 621        upcall.egress_tun_info = NULL;
 622
 623        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 624                 a = nla_next(a, &rem)) {
 625                switch (nla_type(a)) {
 626                case OVS_USERSPACE_ATTR_USERDATA:
 627                        upcall.userdata = a;
 628                        break;
 629
 630                case OVS_USERSPACE_ATTR_PID:
 631                        upcall.portid = nla_get_u32(a);
 632                        break;
 633
 634                case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
 635                        /* Get out tunnel info. */
 636                        struct vport *vport;
 637
 638                        vport = ovs_vport_rcu(dp, nla_get_u32(a));
 639                        if (vport) {
 640                                int err;
 641
 642                                err = ovs_vport_get_egress_tun_info(vport, skb,
 643                                                                    &info);
 644                                if (!err)
 645                                        upcall.egress_tun_info = &info;
 646                        }
 647                        break;
 648                }
 649
 650                } /* End of switch. */
 651        }
 652
 653        return ovs_dp_upcall(dp, skb, key, &upcall);
 654}
 655
 656static int sample(struct datapath *dp, struct sk_buff *skb,
 657                  struct sw_flow_key *key, const struct nlattr *attr)
 658{
 659        const struct nlattr *acts_list = NULL;
 660        const struct nlattr *a;
 661        int rem;
 662
 663        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 664                 a = nla_next(a, &rem)) {
 665                switch (nla_type(a)) {
 666                case OVS_SAMPLE_ATTR_PROBABILITY:
 667                        if (prandom_u32() >= nla_get_u32(a))
 668                                return 0;
 669                        break;
 670
 671                case OVS_SAMPLE_ATTR_ACTIONS:
 672                        acts_list = a;
 673                        break;
 674                }
 675        }
 676
 677        rem = nla_len(acts_list);
 678        a = nla_data(acts_list);
 679
 680        /* Actions list is empty, do nothing */
 681        if (unlikely(!rem))
 682                return 0;
 683
 684        /* The only known usage of sample action is having a single user-space
 685         * action. Treat this usage as a special case.
 686         * The output_userspace() should clone the skb to be sent to the
 687         * user space. This skb will be consumed by its caller.
 688         */
 689        if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
 690                   nla_is_last(a, rem)))
 691                return output_userspace(dp, skb, key, a);
 692
 693        skb = skb_clone(skb, GFP_ATOMIC);
 694        if (!skb)
 695                /* Skip the sample action when out of memory. */
 696                return 0;
 697
 698        if (!add_deferred_actions(skb, key, a)) {
 699                if (net_ratelimit())
 700                        pr_warn("%s: deferred actions limit reached, dropping sample action\n",
 701                                ovs_dp_name(dp));
 702
 703                kfree_skb(skb);
 704        }
 705        return 0;
 706}
 707
 708static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 709                         const struct nlattr *attr)
 710{
 711        struct ovs_action_hash *hash_act = nla_data(attr);
 712        u32 hash = 0;
 713
 714        /* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
 715        hash = skb_get_hash(skb);
 716        hash = jhash_1word(hash, hash_act->hash_basis);
 717        if (!hash)
 718                hash = 0x1;
 719
 720        key->ovs_flow_hash = hash;
 721}
 722
 723static int execute_set_action(struct sk_buff *skb,
 724                              struct sw_flow_key *flow_key,
 725                              const struct nlattr *a)
 726{
 727        /* Only tunnel set execution is supported without a mask. */
 728        if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
 729                OVS_CB(skb)->egress_tun_info = nla_data(a);
 730                return 0;
 731        }
 732
 733        return -EINVAL;
 734}
 735
 736/* Mask is at the midpoint of the data. */
 737#define get_mask(a, type) ((const type)nla_data(a) + 1)
 738
 739static int execute_masked_set_action(struct sk_buff *skb,
 740                                     struct sw_flow_key *flow_key,
 741                                     const struct nlattr *a)
 742{
 743        int err = 0;
 744
 745        switch (nla_type(a)) {
 746        case OVS_KEY_ATTR_PRIORITY:
 747                SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
 748                flow_key->phy.priority = skb->priority;
 749                break;
 750
 751        case OVS_KEY_ATTR_SKB_MARK:
 752                SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
 753                flow_key->phy.skb_mark = skb->mark;
 754                break;
 755
 756        case OVS_KEY_ATTR_TUNNEL_INFO:
 757                /* Masked data not supported for tunnel. */
 758                err = -EINVAL;
 759                break;
 760
 761        case OVS_KEY_ATTR_ETHERNET:
 762                err = set_eth_addr(skb, flow_key, nla_data(a),
 763                                   get_mask(a, struct ovs_key_ethernet *));
 764                break;
 765
 766        case OVS_KEY_ATTR_IPV4:
 767                err = set_ipv4(skb, flow_key, nla_data(a),
 768                               get_mask(a, struct ovs_key_ipv4 *));
 769                break;
 770
 771        case OVS_KEY_ATTR_IPV6:
 772                err = set_ipv6(skb, flow_key, nla_data(a),
 773                               get_mask(a, struct ovs_key_ipv6 *));
 774                break;
 775
 776        case OVS_KEY_ATTR_TCP:
 777                err = set_tcp(skb, flow_key, nla_data(a),
 778                              get_mask(a, struct ovs_key_tcp *));
 779                break;
 780
 781        case OVS_KEY_ATTR_UDP:
 782                err = set_udp(skb, flow_key, nla_data(a),
 783                              get_mask(a, struct ovs_key_udp *));
 784                break;
 785
 786        case OVS_KEY_ATTR_SCTP:
 787                err = set_sctp(skb, flow_key, nla_data(a),
 788                               get_mask(a, struct ovs_key_sctp *));
 789                break;
 790
 791        case OVS_KEY_ATTR_MPLS:
 792                err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
 793                                                                    __be32 *));
 794                break;
 795        }
 796
 797        return err;
 798}
 799
 800static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
 801                          struct sw_flow_key *key,
 802                          const struct nlattr *a, int rem)
 803{
 804        struct deferred_action *da;
 805
 806        if (!is_flow_key_valid(key)) {
 807                int err;
 808
 809                err = ovs_flow_key_update(skb, key);
 810                if (err)
 811                        return err;
 812        }
 813        BUG_ON(!is_flow_key_valid(key));
 814
 815        if (!nla_is_last(a, rem)) {
 816                /* Recirc action is the not the last action
 817                 * of the action list, need to clone the skb.
 818                 */
 819                skb = skb_clone(skb, GFP_ATOMIC);
 820
 821                /* Skip the recirc action when out of memory, but
 822                 * continue on with the rest of the action list.
 823                 */
 824                if (!skb)
 825                        return 0;
 826        }
 827
 828        da = add_deferred_actions(skb, key, NULL);
 829        if (da) {
 830                da->pkt_key.recirc_id = nla_get_u32(a);
 831        } else {
 832                kfree_skb(skb);
 833
 834                if (net_ratelimit())
 835                        pr_warn("%s: deferred action limit reached, drop recirc action\n",
 836                                ovs_dp_name(dp));
 837        }
 838
 839        return 0;
 840}
 841
 842/* Execute a list of actions against 'skb'. */
 843static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 844                              struct sw_flow_key *key,
 845                              const struct nlattr *attr, int len)
 846{
 847        /* Every output action needs a separate clone of 'skb', but the common
 848         * case is just a single output action, so that doing a clone and
 849         * then freeing the original skbuff is wasteful.  So the following code
 850         * is slightly obscure just to avoid that.
 851         */
 852        int prev_port = -1;
 853        const struct nlattr *a;
 854        int rem;
 855
 856        for (a = attr, rem = len; rem > 0;
 857             a = nla_next(a, &rem)) {
 858                int err = 0;
 859
 860                if (unlikely(prev_port != -1)) {
 861                        struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
 862
 863                        if (out_skb)
 864                                do_output(dp, out_skb, prev_port);
 865
 866                        prev_port = -1;
 867                }
 868
 869                switch (nla_type(a)) {
 870                case OVS_ACTION_ATTR_OUTPUT:
 871                        prev_port = nla_get_u32(a);
 872                        break;
 873
 874                case OVS_ACTION_ATTR_USERSPACE:
 875                        output_userspace(dp, skb, key, a);
 876                        break;
 877
 878                case OVS_ACTION_ATTR_HASH:
 879                        execute_hash(skb, key, a);
 880                        break;
 881
 882                case OVS_ACTION_ATTR_PUSH_MPLS:
 883                        err = push_mpls(skb, key, nla_data(a));
 884                        break;
 885
 886                case OVS_ACTION_ATTR_POP_MPLS:
 887                        err = pop_mpls(skb, key, nla_get_be16(a));
 888                        break;
 889
 890                case OVS_ACTION_ATTR_PUSH_VLAN:
 891                        err = push_vlan(skb, key, nla_data(a));
 892                        break;
 893
 894                case OVS_ACTION_ATTR_POP_VLAN:
 895                        err = pop_vlan(skb, key);
 896                        break;
 897
 898                case OVS_ACTION_ATTR_RECIRC:
 899                        err = execute_recirc(dp, skb, key, a, rem);
 900                        if (nla_is_last(a, rem)) {
 901                                /* If this is the last action, the skb has
 902                                 * been consumed or freed.
 903                                 * Return immediately.
 904                                 */
 905                                return err;
 906                        }
 907                        break;
 908
 909                case OVS_ACTION_ATTR_SET:
 910                        err = execute_set_action(skb, key, nla_data(a));
 911                        break;
 912
 913                case OVS_ACTION_ATTR_SET_MASKED:
 914                case OVS_ACTION_ATTR_SET_TO_MASKED:
 915                        err = execute_masked_set_action(skb, key, nla_data(a));
 916                        break;
 917
 918                case OVS_ACTION_ATTR_SAMPLE:
 919                        err = sample(dp, skb, key, a);
 920                        break;
 921                }
 922
 923                if (unlikely(err)) {
 924                        kfree_skb(skb);
 925                        return err;
 926                }
 927        }
 928
 929        if (prev_port != -1)
 930                do_output(dp, skb, prev_port);
 931        else
 932                consume_skb(skb);
 933
 934        return 0;
 935}
 936
 937static void process_deferred_actions(struct datapath *dp)
 938{
 939        struct action_fifo *fifo = this_cpu_ptr(action_fifos);
 940
 941        /* Do not touch the FIFO in case there is no deferred actions. */
 942        if (action_fifo_is_empty(fifo))
 943                return;
 944
 945        /* Finishing executing all deferred actions. */
 946        do {
 947                struct deferred_action *da = action_fifo_get(fifo);
 948                struct sk_buff *skb = da->skb;
 949                struct sw_flow_key *key = &da->pkt_key;
 950                const struct nlattr *actions = da->actions;
 951
 952                if (actions)
 953                        do_execute_actions(dp, skb, key, actions,
 954                                           nla_len(actions));
 955                else
 956                        ovs_dp_process_packet(skb, key);
 957        } while (!action_fifo_is_empty(fifo));
 958
 959        /* Reset FIFO for the next packet.  */
 960        action_fifo_init(fifo);
 961}
 962
 963/* Execute a list of actions against 'skb'. */
 964int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 965                        const struct sw_flow_actions *acts,
 966                        struct sw_flow_key *key)
 967{
 968        int level = this_cpu_read(exec_actions_level);
 969        int err;
 970
 971        this_cpu_inc(exec_actions_level);
 972        OVS_CB(skb)->egress_tun_info = NULL;
 973        err = do_execute_actions(dp, skb, key,
 974                                 acts->actions, acts->actions_len);
 975
 976        if (!level)
 977                process_deferred_actions(dp);
 978
 979        this_cpu_dec(exec_actions_level);
 980        return err;
 981}
 982
 983int action_fifos_init(void)
 984{
 985        action_fifos = alloc_percpu(struct action_fifo);
 986        if (!action_fifos)
 987                return -ENOMEM;
 988
 989        return 0;
 990}
 991
 992void action_fifos_exit(void)
 993{
 994        free_percpu(action_fifos);
 995}
 996