linux/net/openvswitch/actions.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2014 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/skbuff.h>
  22#include <linux/in.h>
  23#include <linux/ip.h>
  24#include <linux/openvswitch.h>
  25#include <linux/sctp.h>
  26#include <linux/tcp.h>
  27#include <linux/udp.h>
  28#include <linux/in6.h>
  29#include <linux/if_arp.h>
  30#include <linux/if_vlan.h>
  31
  32#include <net/ip.h>
  33#include <net/ipv6.h>
  34#include <net/checksum.h>
  35#include <net/dsfield.h>
  36#include <net/mpls.h>
  37#include <net/sctp/checksum.h>
  38
  39#include "datapath.h"
  40#include "flow.h"
  41#include "vport.h"
  42
  43static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  44                              struct sw_flow_key *key,
  45                              const struct nlattr *attr, int len);
  46
  47struct deferred_action {
  48        struct sk_buff *skb;
  49        const struct nlattr *actions;
  50
  51        /* Store pkt_key clone when creating deferred action. */
  52        struct sw_flow_key pkt_key;
  53};
  54
  55#define DEFERRED_ACTION_FIFO_SIZE 10
  56struct action_fifo {
  57        int head;
  58        int tail;
  59        /* Deferred action fifo queue storage. */
  60        struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
  61};
  62
  63static struct action_fifo __percpu *action_fifos;
  64static DEFINE_PER_CPU(int, exec_actions_level);
  65
  66static void action_fifo_init(struct action_fifo *fifo)
  67{
  68        fifo->head = 0;
  69        fifo->tail = 0;
  70}
  71
  72static bool action_fifo_is_empty(const struct action_fifo *fifo)
  73{
  74        return (fifo->head == fifo->tail);
  75}
  76
  77static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
  78{
  79        if (action_fifo_is_empty(fifo))
  80                return NULL;
  81
  82        return &fifo->fifo[fifo->tail++];
  83}
  84
  85static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
  86{
  87        if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
  88                return NULL;
  89
  90        return &fifo->fifo[fifo->head++];
  91}
  92
  93/* Return true if fifo is not full */
  94static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
  95                                                    const struct sw_flow_key *key,
  96                                                    const struct nlattr *attr)
  97{
  98        struct action_fifo *fifo;
  99        struct deferred_action *da;
 100
 101        fifo = this_cpu_ptr(action_fifos);
 102        da = action_fifo_put(fifo);
 103        if (da) {
 104                da->skb = skb;
 105                da->actions = attr;
 106                da->pkt_key = *key;
 107        }
 108
 109        return da;
 110}
 111
 112static void invalidate_flow_key(struct sw_flow_key *key)
 113{
 114        key->eth.type = htons(0);
 115}
 116
 117static bool is_flow_key_valid(const struct sw_flow_key *key)
 118{
 119        return !!key->eth.type;
 120}
 121
 122static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 123                     const struct ovs_action_push_mpls *mpls)
 124{
 125        __be32 *new_mpls_lse;
 126        struct ethhdr *hdr;
 127
 128        /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
 129        if (skb->encapsulation)
 130                return -ENOTSUPP;
 131
 132        if (skb_cow_head(skb, MPLS_HLEN) < 0)
 133                return -ENOMEM;
 134
 135        skb_push(skb, MPLS_HLEN);
 136        memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
 137                skb->mac_len);
 138        skb_reset_mac_header(skb);
 139
 140        new_mpls_lse = (__be32 *)skb_mpls_header(skb);
 141        *new_mpls_lse = mpls->mpls_lse;
 142
 143        if (skb->ip_summed == CHECKSUM_COMPLETE)
 144                skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
 145                                                             MPLS_HLEN, 0));
 146
 147        hdr = eth_hdr(skb);
 148        hdr->h_proto = mpls->mpls_ethertype;
 149
 150        if (!skb->inner_protocol)
 151                skb_set_inner_protocol(skb, skb->protocol);
 152        skb->protocol = mpls->mpls_ethertype;
 153
 154        invalidate_flow_key(key);
 155        return 0;
 156}
 157
 158static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 159                    const __be16 ethertype)
 160{
 161        struct ethhdr *hdr;
 162        int err;
 163
 164        err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
 165        if (unlikely(err))
 166                return err;
 167
 168        skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
 169
 170        memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
 171                skb->mac_len);
 172
 173        __skb_pull(skb, MPLS_HLEN);
 174        skb_reset_mac_header(skb);
 175
 176        /* skb_mpls_header() is used to locate the ethertype
 177         * field correctly in the presence of VLAN tags.
 178         */
 179        hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
 180        hdr->h_proto = ethertype;
 181        if (eth_p_mpls(skb->protocol))
 182                skb->protocol = ethertype;
 183
 184        invalidate_flow_key(key);
 185        return 0;
 186}
 187
 188/* 'KEY' must not have any bits set outside of the 'MASK' */
 189#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
 190#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
 191
 192static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
 193                    const __be32 *mpls_lse, const __be32 *mask)
 194{
 195        __be32 *stack;
 196        __be32 lse;
 197        int err;
 198
 199        err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
 200        if (unlikely(err))
 201                return err;
 202
 203        stack = (__be32 *)skb_mpls_header(skb);
 204        lse = MASKED(*stack, *mpls_lse, *mask);
 205        if (skb->ip_summed == CHECKSUM_COMPLETE) {
 206                __be32 diff[] = { ~(*stack), lse };
 207
 208                skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 209                                          ~skb->csum);
 210        }
 211
 212        *stack = lse;
 213        flow_key->mpls.top_lse = lse;
 214        return 0;
 215}
 216
 217static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 218{
 219        int err;
 220
 221        err = skb_vlan_pop(skb);
 222        if (skb_vlan_tag_present(skb))
 223                invalidate_flow_key(key);
 224        else
 225                key->eth.tci = 0;
 226        return err;
 227}
 228
 229static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 230                     const struct ovs_action_push_vlan *vlan)
 231{
 232        if (skb_vlan_tag_present(skb))
 233                invalidate_flow_key(key);
 234        else
 235                key->eth.tci = vlan->vlan_tci;
 236        return skb_vlan_push(skb, vlan->vlan_tpid,
 237                             ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 238}
 239
 240/* 'src' is already properly masked. */
 241static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
 242{
 243        u16 *dst = (u16 *)dst_;
 244        const u16 *src = (const u16 *)src_;
 245        const u16 *mask = (const u16 *)mask_;
 246
 247        SET_MASKED(dst[0], src[0], mask[0]);
 248        SET_MASKED(dst[1], src[1], mask[1]);
 249        SET_MASKED(dst[2], src[2], mask[2]);
 250}
 251
 252static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
 253                        const struct ovs_key_ethernet *key,
 254                        const struct ovs_key_ethernet *mask)
 255{
 256        int err;
 257
 258        err = skb_ensure_writable(skb, ETH_HLEN);
 259        if (unlikely(err))
 260                return err;
 261
 262        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 263
 264        ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
 265                               mask->eth_src);
 266        ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
 267                               mask->eth_dst);
 268
 269        ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 270
 271        ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
 272        ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
 273        return 0;
 274}
 275
 276static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
 277                                  __be32 addr, __be32 new_addr)
 278{
 279        int transport_len = skb->len - skb_transport_offset(skb);
 280
 281        if (nh->frag_off & htons(IP_OFFSET))
 282                return;
 283
 284        if (nh->protocol == IPPROTO_TCP) {
 285                if (likely(transport_len >= sizeof(struct tcphdr)))
 286                        inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
 287                                                 addr, new_addr, 1);
 288        } else if (nh->protocol == IPPROTO_UDP) {
 289                if (likely(transport_len >= sizeof(struct udphdr))) {
 290                        struct udphdr *uh = udp_hdr(skb);
 291
 292                        if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 293                                inet_proto_csum_replace4(&uh->check, skb,
 294                                                         addr, new_addr, 1);
 295                                if (!uh->check)
 296                                        uh->check = CSUM_MANGLED_0;
 297                        }
 298                }
 299        }
 300}
 301
 302static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 303                        __be32 *addr, __be32 new_addr)
 304{
 305        update_ip_l4_checksum(skb, nh, *addr, new_addr);
 306        csum_replace4(&nh->check, *addr, new_addr);
 307        skb_clear_hash(skb);
 308        *addr = new_addr;
 309}
 310
 311static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 312                                 __be32 addr[4], const __be32 new_addr[4])
 313{
 314        int transport_len = skb->len - skb_transport_offset(skb);
 315
 316        if (l4_proto == NEXTHDR_TCP) {
 317                if (likely(transport_len >= sizeof(struct tcphdr)))
 318                        inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
 319                                                  addr, new_addr, 1);
 320        } else if (l4_proto == NEXTHDR_UDP) {
 321                if (likely(transport_len >= sizeof(struct udphdr))) {
 322                        struct udphdr *uh = udp_hdr(skb);
 323
 324                        if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 325                                inet_proto_csum_replace16(&uh->check, skb,
 326                                                          addr, new_addr, 1);
 327                                if (!uh->check)
 328                                        uh->check = CSUM_MANGLED_0;
 329                        }
 330                }
 331        } else if (l4_proto == NEXTHDR_ICMP) {
 332                if (likely(transport_len >= sizeof(struct icmp6hdr)))
 333                        inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
 334                                                  skb, addr, new_addr, 1);
 335        }
 336}
 337
 338static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
 339                           const __be32 mask[4], __be32 masked[4])
 340{
 341        masked[0] = MASKED(old[0], addr[0], mask[0]);
 342        masked[1] = MASKED(old[1], addr[1], mask[1]);
 343        masked[2] = MASKED(old[2], addr[2], mask[2]);
 344        masked[3] = MASKED(old[3], addr[3], mask[3]);
 345}
 346
 347static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 348                          __be32 addr[4], const __be32 new_addr[4],
 349                          bool recalculate_csum)
 350{
 351        if (recalculate_csum)
 352                update_ipv6_checksum(skb, l4_proto, addr, new_addr);
 353
 354        skb_clear_hash(skb);
 355        memcpy(addr, new_addr, sizeof(__be32[4]));
 356}
 357
 358static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
 359{
 360        /* Bits 21-24 are always unmasked, so this retains their values. */
 361        SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
 362        SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
 363        SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
 364}
 365
 366static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
 367                       u8 mask)
 368{
 369        new_ttl = MASKED(nh->ttl, new_ttl, mask);
 370
 371        csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 372        nh->ttl = new_ttl;
 373}
 374
 375static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
 376                    const struct ovs_key_ipv4 *key,
 377                    const struct ovs_key_ipv4 *mask)
 378{
 379        struct iphdr *nh;
 380        __be32 new_addr;
 381        int err;
 382
 383        err = skb_ensure_writable(skb, skb_network_offset(skb) +
 384                                  sizeof(struct iphdr));
 385        if (unlikely(err))
 386                return err;
 387
 388        nh = ip_hdr(skb);
 389
 390        /* Setting an IP addresses is typically only a side effect of
 391         * matching on them in the current userspace implementation, so it
 392         * makes sense to check if the value actually changed.
 393         */
 394        if (mask->ipv4_src) {
 395                new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
 396
 397                if (unlikely(new_addr != nh->saddr)) {
 398                        set_ip_addr(skb, nh, &nh->saddr, new_addr);
 399                        flow_key->ipv4.addr.src = new_addr;
 400                }
 401        }
 402        if (mask->ipv4_dst) {
 403                new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
 404
 405                if (unlikely(new_addr != nh->daddr)) {
 406                        set_ip_addr(skb, nh, &nh->daddr, new_addr);
 407                        flow_key->ipv4.addr.dst = new_addr;
 408                }
 409        }
 410        if (mask->ipv4_tos) {
 411                ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
 412                flow_key->ip.tos = nh->tos;
 413        }
 414        if (mask->ipv4_ttl) {
 415                set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
 416                flow_key->ip.ttl = nh->ttl;
 417        }
 418
 419        return 0;
 420}
 421
 422static bool is_ipv6_mask_nonzero(const __be32 addr[4])
 423{
 424        return !!(addr[0] | addr[1] | addr[2] | addr[3]);
 425}
 426
 427static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 428                    const struct ovs_key_ipv6 *key,
 429                    const struct ovs_key_ipv6 *mask)
 430{
 431        struct ipv6hdr *nh;
 432        int err;
 433
 434        err = skb_ensure_writable(skb, skb_network_offset(skb) +
 435                                  sizeof(struct ipv6hdr));
 436        if (unlikely(err))
 437                return err;
 438
 439        nh = ipv6_hdr(skb);
 440
 441        /* Setting an IP addresses is typically only a side effect of
 442         * matching on them in the current userspace implementation, so it
 443         * makes sense to check if the value actually changed.
 444         */
 445        if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
 446                __be32 *saddr = (__be32 *)&nh->saddr;
 447                __be32 masked[4];
 448
 449                mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
 450
 451                if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
 452                        set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
 453                                      true);
 454                        memcpy(&flow_key->ipv6.addr.src, masked,
 455                               sizeof(flow_key->ipv6.addr.src));
 456                }
 457        }
 458        if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
 459                unsigned int offset = 0;
 460                int flags = IP6_FH_F_SKIP_RH;
 461                bool recalc_csum = true;
 462                __be32 *daddr = (__be32 *)&nh->daddr;
 463                __be32 masked[4];
 464
 465                mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
 466
 467                if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
 468                        if (ipv6_ext_hdr(nh->nexthdr))
 469                                recalc_csum = (ipv6_find_hdr(skb, &offset,
 470                                                             NEXTHDR_ROUTING,
 471                                                             NULL, &flags)
 472                                               != NEXTHDR_ROUTING);
 473
 474                        set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
 475                                      recalc_csum);
 476                        memcpy(&flow_key->ipv6.addr.dst, masked,
 477                               sizeof(flow_key->ipv6.addr.dst));
 478                }
 479        }
 480        if (mask->ipv6_tclass) {
 481                ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
 482                flow_key->ip.tos = ipv6_get_dsfield(nh);
 483        }
 484        if (mask->ipv6_label) {
 485                set_ipv6_fl(nh, ntohl(key->ipv6_label),
 486                            ntohl(mask->ipv6_label));
 487                flow_key->ipv6.label =
 488                    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
 489        }
 490        if (mask->ipv6_hlimit) {
 491                SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
 492                flow_key->ip.ttl = nh->hop_limit;
 493        }
 494        return 0;
 495}
 496
 497/* Must follow skb_ensure_writable() since that can move the skb data. */
 498static void set_tp_port(struct sk_buff *skb, __be16 *port,
 499                        __be16 new_port, __sum16 *check)
 500{
 501        inet_proto_csum_replace2(check, skb, *port, new_port, 0);
 502        *port = new_port;
 503}
 504
 505static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 506                   const struct ovs_key_udp *key,
 507                   const struct ovs_key_udp *mask)
 508{
 509        struct udphdr *uh;
 510        __be16 src, dst;
 511        int err;
 512
 513        err = skb_ensure_writable(skb, skb_transport_offset(skb) +
 514                                  sizeof(struct udphdr));
 515        if (unlikely(err))
 516                return err;
 517
 518        uh = udp_hdr(skb);
 519        /* Either of the masks is non-zero, so do not bother checking them. */
 520        src = MASKED(uh->source, key->udp_src, mask->udp_src);
 521        dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
 522
 523        if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
 524                if (likely(src != uh->source)) {
 525                        set_tp_port(skb, &uh->source, src, &uh->check);
 526                        flow_key->tp.src = src;
 527                }
 528                if (likely(dst != uh->dest)) {
 529                        set_tp_port(skb, &uh->dest, dst, &uh->check);
 530                        flow_key->tp.dst = dst;
 531                }
 532
 533                if (unlikely(!uh->check))
 534                        uh->check = CSUM_MANGLED_0;
 535        } else {
 536                uh->source = src;
 537                uh->dest = dst;
 538                flow_key->tp.src = src;
 539                flow_key->tp.dst = dst;
 540        }
 541
 542        skb_clear_hash(skb);
 543
 544        return 0;
 545}
 546
 547static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 548                   const struct ovs_key_tcp *key,
 549                   const struct ovs_key_tcp *mask)
 550{
 551        struct tcphdr *th;
 552        __be16 src, dst;
 553        int err;
 554
 555        err = skb_ensure_writable(skb, skb_transport_offset(skb) +
 556                                  sizeof(struct tcphdr));
 557        if (unlikely(err))
 558                return err;
 559
 560        th = tcp_hdr(skb);
 561        src = MASKED(th->source, key->tcp_src, mask->tcp_src);
 562        if (likely(src != th->source)) {
 563                set_tp_port(skb, &th->source, src, &th->check);
 564                flow_key->tp.src = src;
 565        }
 566        dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
 567        if (likely(dst != th->dest)) {
 568                set_tp_port(skb, &th->dest, dst, &th->check);
 569                flow_key->tp.dst = dst;
 570        }
 571        skb_clear_hash(skb);
 572
 573        return 0;
 574}
 575
 576static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 577                    const struct ovs_key_sctp *key,
 578                    const struct ovs_key_sctp *mask)
 579{
 580        unsigned int sctphoff = skb_transport_offset(skb);
 581        struct sctphdr *sh;
 582        __le32 old_correct_csum, new_csum, old_csum;
 583        int err;
 584
 585        err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
 586        if (unlikely(err))
 587                return err;
 588
 589        sh = sctp_hdr(skb);
 590        old_csum = sh->checksum;
 591        old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 592
 593        sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
 594        sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
 595
 596        new_csum = sctp_compute_cksum(skb, sctphoff);
 597
 598        /* Carry any checksum errors through. */
 599        sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
 600
 601        skb_clear_hash(skb);
 602        flow_key->tp.src = sh->source;
 603        flow_key->tp.dst = sh->dest;
 604
 605        return 0;
 606}
 607
 608static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 609{
 610        struct vport *vport = ovs_vport_rcu(dp, out_port);
 611
 612        if (likely(vport))
 613                ovs_vport_send(vport, skb);
 614        else
 615                kfree_skb(skb);
 616}
 617
 618static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 619                            struct sw_flow_key *key, const struct nlattr *attr,
 620                            const struct nlattr *actions, int actions_len)
 621{
 622        struct ovs_tunnel_info info;
 623        struct dp_upcall_info upcall;
 624        const struct nlattr *a;
 625        int rem;
 626
 627        memset(&upcall, 0, sizeof(upcall));
 628        upcall.cmd = OVS_PACKET_CMD_ACTION;
 629
 630        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 631                 a = nla_next(a, &rem)) {
 632                switch (nla_type(a)) {
 633                case OVS_USERSPACE_ATTR_USERDATA:
 634                        upcall.userdata = a;
 635                        break;
 636
 637                case OVS_USERSPACE_ATTR_PID:
 638                        upcall.portid = nla_get_u32(a);
 639                        break;
 640
 641                case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
 642                        /* Get out tunnel info. */
 643                        struct vport *vport;
 644
 645                        vport = ovs_vport_rcu(dp, nla_get_u32(a));
 646                        if (vport) {
 647                                int err;
 648
 649                                err = ovs_vport_get_egress_tun_info(vport, skb,
 650                                                                    &info);
 651                                if (!err)
 652                                        upcall.egress_tun_info = &info;
 653                        }
 654                        break;
 655                }
 656
 657                case OVS_USERSPACE_ATTR_ACTIONS: {
 658                        /* Include actions. */
 659                        upcall.actions = actions;
 660                        upcall.actions_len = actions_len;
 661                        break;
 662                }
 663
 664                } /* End of switch. */
 665        }
 666
 667        return ovs_dp_upcall(dp, skb, key, &upcall);
 668}
 669
 670static int sample(struct datapath *dp, struct sk_buff *skb,
 671                  struct sw_flow_key *key, const struct nlattr *attr,
 672                  const struct nlattr *actions, int actions_len)
 673{
 674        const struct nlattr *acts_list = NULL;
 675        const struct nlattr *a;
 676        int rem;
 677
 678        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 679                 a = nla_next(a, &rem)) {
 680                switch (nla_type(a)) {
 681                case OVS_SAMPLE_ATTR_PROBABILITY:
 682                        if (prandom_u32() >= nla_get_u32(a))
 683                                return 0;
 684                        break;
 685
 686                case OVS_SAMPLE_ATTR_ACTIONS:
 687                        acts_list = a;
 688                        break;
 689                }
 690        }
 691
 692        rem = nla_len(acts_list);
 693        a = nla_data(acts_list);
 694
 695        /* Actions list is empty, do nothing */
 696        if (unlikely(!rem))
 697                return 0;
 698
 699        /* The only known usage of sample action is having a single user-space
 700         * action. Treat this usage as a special case.
 701         * The output_userspace() should clone the skb to be sent to the
 702         * user space. This skb will be consumed by its caller.
 703         */
 704        if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
 705                   nla_is_last(a, rem)))
 706                return output_userspace(dp, skb, key, a, actions, actions_len);
 707
 708        skb = skb_clone(skb, GFP_ATOMIC);
 709        if (!skb)
 710                /* Skip the sample action when out of memory. */
 711                return 0;
 712
 713        if (!add_deferred_actions(skb, key, a)) {
 714                if (net_ratelimit())
 715                        pr_warn("%s: deferred actions limit reached, dropping sample action\n",
 716                                ovs_dp_name(dp));
 717
 718                kfree_skb(skb);
 719        }
 720        return 0;
 721}
 722
 723static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 724                         const struct nlattr *attr)
 725{
 726        struct ovs_action_hash *hash_act = nla_data(attr);
 727        u32 hash = 0;
 728
 729        /* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
 730        hash = skb_get_hash(skb);
 731        hash = jhash_1word(hash, hash_act->hash_basis);
 732        if (!hash)
 733                hash = 0x1;
 734
 735        key->ovs_flow_hash = hash;
 736}
 737
 738static int execute_set_action(struct sk_buff *skb,
 739                              struct sw_flow_key *flow_key,
 740                              const struct nlattr *a)
 741{
 742        /* Only tunnel set execution is supported without a mask. */
 743        if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
 744                OVS_CB(skb)->egress_tun_info = nla_data(a);
 745                return 0;
 746        }
 747
 748        return -EINVAL;
 749}
 750
 751/* Mask is at the midpoint of the data. */
 752#define get_mask(a, type) ((const type)nla_data(a) + 1)
 753
 754static int execute_masked_set_action(struct sk_buff *skb,
 755                                     struct sw_flow_key *flow_key,
 756                                     const struct nlattr *a)
 757{
 758        int err = 0;
 759
 760        switch (nla_type(a)) {
 761        case OVS_KEY_ATTR_PRIORITY:
 762                SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
 763                flow_key->phy.priority = skb->priority;
 764                break;
 765
 766        case OVS_KEY_ATTR_SKB_MARK:
 767                SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
 768                flow_key->phy.skb_mark = skb->mark;
 769                break;
 770
 771        case OVS_KEY_ATTR_TUNNEL_INFO:
 772                /* Masked data not supported for tunnel. */
 773                err = -EINVAL;
 774                break;
 775
 776        case OVS_KEY_ATTR_ETHERNET:
 777                err = set_eth_addr(skb, flow_key, nla_data(a),
 778                                   get_mask(a, struct ovs_key_ethernet *));
 779                break;
 780
 781        case OVS_KEY_ATTR_IPV4:
 782                err = set_ipv4(skb, flow_key, nla_data(a),
 783                               get_mask(a, struct ovs_key_ipv4 *));
 784                break;
 785
 786        case OVS_KEY_ATTR_IPV6:
 787                err = set_ipv6(skb, flow_key, nla_data(a),
 788                               get_mask(a, struct ovs_key_ipv6 *));
 789                break;
 790
 791        case OVS_KEY_ATTR_TCP:
 792                err = set_tcp(skb, flow_key, nla_data(a),
 793                              get_mask(a, struct ovs_key_tcp *));
 794                break;
 795
 796        case OVS_KEY_ATTR_UDP:
 797                err = set_udp(skb, flow_key, nla_data(a),
 798                              get_mask(a, struct ovs_key_udp *));
 799                break;
 800
 801        case OVS_KEY_ATTR_SCTP:
 802                err = set_sctp(skb, flow_key, nla_data(a),
 803                               get_mask(a, struct ovs_key_sctp *));
 804                break;
 805
 806        case OVS_KEY_ATTR_MPLS:
 807                err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
 808                                                                    __be32 *));
 809                break;
 810        }
 811
 812        return err;
 813}
 814
 815static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
 816                          struct sw_flow_key *key,
 817                          const struct nlattr *a, int rem)
 818{
 819        struct deferred_action *da;
 820
 821        if (!is_flow_key_valid(key)) {
 822                int err;
 823
 824                err = ovs_flow_key_update(skb, key);
 825                if (err)
 826                        return err;
 827        }
 828        BUG_ON(!is_flow_key_valid(key));
 829
 830        if (!nla_is_last(a, rem)) {
 831                /* Recirc action is the not the last action
 832                 * of the action list, need to clone the skb.
 833                 */
 834                skb = skb_clone(skb, GFP_ATOMIC);
 835
 836                /* Skip the recirc action when out of memory, but
 837                 * continue on with the rest of the action list.
 838                 */
 839                if (!skb)
 840                        return 0;
 841        }
 842
 843        da = add_deferred_actions(skb, key, NULL);
 844        if (da) {
 845                da->pkt_key.recirc_id = nla_get_u32(a);
 846        } else {
 847                kfree_skb(skb);
 848
 849                if (net_ratelimit())
 850                        pr_warn("%s: deferred action limit reached, drop recirc action\n",
 851                                ovs_dp_name(dp));
 852        }
 853
 854        return 0;
 855}
 856
 857/* Execute a list of actions against 'skb'. */
 858static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 859                              struct sw_flow_key *key,
 860                              const struct nlattr *attr, int len)
 861{
 862        /* Every output action needs a separate clone of 'skb', but the common
 863         * case is just a single output action, so that doing a clone and
 864         * then freeing the original skbuff is wasteful.  So the following code
 865         * is slightly obscure just to avoid that.
 866         */
 867        int prev_port = -1;
 868        const struct nlattr *a;
 869        int rem;
 870
 871        for (a = attr, rem = len; rem > 0;
 872             a = nla_next(a, &rem)) {
 873                int err = 0;
 874
 875                if (unlikely(prev_port != -1)) {
 876                        struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
 877
 878                        if (out_skb)
 879                                do_output(dp, out_skb, prev_port);
 880
 881                        prev_port = -1;
 882                }
 883
 884                switch (nla_type(a)) {
 885                case OVS_ACTION_ATTR_OUTPUT:
 886                        prev_port = nla_get_u32(a);
 887                        break;
 888
 889                case OVS_ACTION_ATTR_USERSPACE:
 890                        output_userspace(dp, skb, key, a, attr, len);
 891                        break;
 892
 893                case OVS_ACTION_ATTR_HASH:
 894                        execute_hash(skb, key, a);
 895                        break;
 896
 897                case OVS_ACTION_ATTR_PUSH_MPLS:
 898                        err = push_mpls(skb, key, nla_data(a));
 899                        break;
 900
 901                case OVS_ACTION_ATTR_POP_MPLS:
 902                        err = pop_mpls(skb, key, nla_get_be16(a));
 903                        break;
 904
 905                case OVS_ACTION_ATTR_PUSH_VLAN:
 906                        err = push_vlan(skb, key, nla_data(a));
 907                        break;
 908
 909                case OVS_ACTION_ATTR_POP_VLAN:
 910                        err = pop_vlan(skb, key);
 911                        break;
 912
 913                case OVS_ACTION_ATTR_RECIRC:
 914                        err = execute_recirc(dp, skb, key, a, rem);
 915                        if (nla_is_last(a, rem)) {
 916                                /* If this is the last action, the skb has
 917                                 * been consumed or freed.
 918                                 * Return immediately.
 919                                 */
 920                                return err;
 921                        }
 922                        break;
 923
 924                case OVS_ACTION_ATTR_SET:
 925                        err = execute_set_action(skb, key, nla_data(a));
 926                        break;
 927
 928                case OVS_ACTION_ATTR_SET_MASKED:
 929                case OVS_ACTION_ATTR_SET_TO_MASKED:
 930                        err = execute_masked_set_action(skb, key, nla_data(a));
 931                        break;
 932
 933                case OVS_ACTION_ATTR_SAMPLE:
 934                        err = sample(dp, skb, key, a, attr, len);
 935                        break;
 936                }
 937
 938                if (unlikely(err)) {
 939                        kfree_skb(skb);
 940                        return err;
 941                }
 942        }
 943
 944        if (prev_port != -1)
 945                do_output(dp, skb, prev_port);
 946        else
 947                consume_skb(skb);
 948
 949        return 0;
 950}
 951
 952static void process_deferred_actions(struct datapath *dp)
 953{
 954        struct action_fifo *fifo = this_cpu_ptr(action_fifos);
 955
 956        /* Do not touch the FIFO in case there is no deferred actions. */
 957        if (action_fifo_is_empty(fifo))
 958                return;
 959
 960        /* Finishing executing all deferred actions. */
 961        do {
 962                struct deferred_action *da = action_fifo_get(fifo);
 963                struct sk_buff *skb = da->skb;
 964                struct sw_flow_key *key = &da->pkt_key;
 965                const struct nlattr *actions = da->actions;
 966
 967                if (actions)
 968                        do_execute_actions(dp, skb, key, actions,
 969                                           nla_len(actions));
 970                else
 971                        ovs_dp_process_packet(skb, key);
 972        } while (!action_fifo_is_empty(fifo));
 973
 974        /* Reset FIFO for the next packet.  */
 975        action_fifo_init(fifo);
 976}
 977
 978/* Execute a list of actions against 'skb'. */
 979int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 980                        const struct sw_flow_actions *acts,
 981                        struct sw_flow_key *key)
 982{
 983        int level = this_cpu_read(exec_actions_level);
 984        int err;
 985
 986        this_cpu_inc(exec_actions_level);
 987        OVS_CB(skb)->egress_tun_info = NULL;
 988        err = do_execute_actions(dp, skb, key,
 989                                 acts->actions, acts->actions_len);
 990
 991        if (!level)
 992                process_deferred_actions(dp);
 993
 994        this_cpu_dec(exec_actions_level);
 995        return err;
 996}
 997
 998int action_fifos_init(void)
 999{
1000        action_fifos = alloc_percpu(struct action_fifo);
1001        if (!action_fifos)
1002                return -ENOMEM;
1003
1004        return 0;
1005}
1006
1007void action_fifos_exit(void)
1008{
1009        free_percpu(action_fifos);
1010}
1011