linux/net/openvswitch/actions.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2012 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/skbuff.h>
  22#include <linux/in.h>
  23#include <linux/ip.h>
  24#include <linux/openvswitch.h>
  25#include <linux/tcp.h>
  26#include <linux/udp.h>
  27#include <linux/in6.h>
  28#include <linux/if_arp.h>
  29#include <linux/if_vlan.h>
  30#include <net/ip.h>
  31#include <net/ipv6.h>
  32#include <net/checksum.h>
  33#include <net/dsfield.h>
  34
  35#include "datapath.h"
  36#include "vport.h"
  37
  38static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  39                        const struct nlattr *attr, int len, bool keep_skb);
  40
  41static int make_writable(struct sk_buff *skb, int write_len)
  42{
  43        if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
  44                return 0;
  45
  46        return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
  47}
  48
  49/* remove VLAN header from packet and update csum accordingly. */
  50static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
  51{
  52        struct vlan_hdr *vhdr;
  53        int err;
  54
  55        err = make_writable(skb, VLAN_ETH_HLEN);
  56        if (unlikely(err))
  57                return err;
  58
  59        if (skb->ip_summed == CHECKSUM_COMPLETE)
  60                skb->csum = csum_sub(skb->csum, csum_partial(skb->data
  61                                        + (2 * ETH_ALEN), VLAN_HLEN, 0));
  62
  63        vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
  64        *current_tci = vhdr->h_vlan_TCI;
  65
  66        memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
  67        __skb_pull(skb, VLAN_HLEN);
  68
  69        vlan_set_encap_proto(skb, vhdr);
  70        skb->mac_header += VLAN_HLEN;
  71        skb_reset_mac_len(skb);
  72
  73        return 0;
  74}
  75
  76static int pop_vlan(struct sk_buff *skb)
  77{
  78        __be16 tci;
  79        int err;
  80
  81        if (likely(vlan_tx_tag_present(skb))) {
  82                skb->vlan_tci = 0;
  83        } else {
  84                if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
  85                             skb->len < VLAN_ETH_HLEN))
  86                        return 0;
  87
  88                err = __pop_vlan_tci(skb, &tci);
  89                if (err)
  90                        return err;
  91        }
  92        /* move next vlan tag to hw accel tag */
  93        if (likely(skb->protocol != htons(ETH_P_8021Q) ||
  94                   skb->len < VLAN_ETH_HLEN))
  95                return 0;
  96
  97        err = __pop_vlan_tci(skb, &tci);
  98        if (unlikely(err))
  99                return err;
 100
 101        __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
 102        return 0;
 103}
 104
 105static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
 106{
 107        if (unlikely(vlan_tx_tag_present(skb))) {
 108                u16 current_tag;
 109
 110                /* push down current VLAN tag */
 111                current_tag = vlan_tx_tag_get(skb);
 112
 113                if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
 114                        return -ENOMEM;
 115
 116                if (skb->ip_summed == CHECKSUM_COMPLETE)
 117                        skb->csum = csum_add(skb->csum, csum_partial(skb->data
 118                                        + (2 * ETH_ALEN), VLAN_HLEN, 0));
 119
 120        }
 121        __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 122        return 0;
 123}
 124
 125static int set_eth_addr(struct sk_buff *skb,
 126                        const struct ovs_key_ethernet *eth_key)
 127{
 128        int err;
 129        err = make_writable(skb, ETH_HLEN);
 130        if (unlikely(err))
 131                return err;
 132
 133        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 134
 135        memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
 136        memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
 137
 138        ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 139
 140        return 0;
 141}
 142
 143static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 144                                __be32 *addr, __be32 new_addr)
 145{
 146        int transport_len = skb->len - skb_transport_offset(skb);
 147
 148        if (nh->protocol == IPPROTO_TCP) {
 149                if (likely(transport_len >= sizeof(struct tcphdr)))
 150                        inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
 151                                                 *addr, new_addr, 1);
 152        } else if (nh->protocol == IPPROTO_UDP) {
 153                if (likely(transport_len >= sizeof(struct udphdr))) {
 154                        struct udphdr *uh = udp_hdr(skb);
 155
 156                        if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 157                                inet_proto_csum_replace4(&uh->check, skb,
 158                                                         *addr, new_addr, 1);
 159                                if (!uh->check)
 160                                        uh->check = CSUM_MANGLED_0;
 161                        }
 162                }
 163        }
 164
 165        csum_replace4(&nh->check, *addr, new_addr);
 166        skb->rxhash = 0;
 167        *addr = new_addr;
 168}
 169
 170static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 171                                 __be32 addr[4], const __be32 new_addr[4])
 172{
 173        int transport_len = skb->len - skb_transport_offset(skb);
 174
 175        if (l4_proto == IPPROTO_TCP) {
 176                if (likely(transport_len >= sizeof(struct tcphdr)))
 177                        inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
 178                                                  addr, new_addr, 1);
 179        } else if (l4_proto == IPPROTO_UDP) {
 180                if (likely(transport_len >= sizeof(struct udphdr))) {
 181                        struct udphdr *uh = udp_hdr(skb);
 182
 183                        if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 184                                inet_proto_csum_replace16(&uh->check, skb,
 185                                                          addr, new_addr, 1);
 186                                if (!uh->check)
 187                                        uh->check = CSUM_MANGLED_0;
 188                        }
 189                }
 190        }
 191}
 192
 193static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 194                          __be32 addr[4], const __be32 new_addr[4],
 195                          bool recalculate_csum)
 196{
 197        if (recalculate_csum)
 198                update_ipv6_checksum(skb, l4_proto, addr, new_addr);
 199
 200        skb->rxhash = 0;
 201        memcpy(addr, new_addr, sizeof(__be32[4]));
 202}
 203
 204static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
 205{
 206        nh->priority = tc >> 4;
 207        nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
 208}
 209
 210static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
 211{
 212        nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
 213        nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
 214        nh->flow_lbl[2] = fl & 0x000000FF;
 215}
 216
 217static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
 218{
 219        csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 220        nh->ttl = new_ttl;
 221}
 222
 223static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
 224{
 225        struct iphdr *nh;
 226        int err;
 227
 228        err = make_writable(skb, skb_network_offset(skb) +
 229                                 sizeof(struct iphdr));
 230        if (unlikely(err))
 231                return err;
 232
 233        nh = ip_hdr(skb);
 234
 235        if (ipv4_key->ipv4_src != nh->saddr)
 236                set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
 237
 238        if (ipv4_key->ipv4_dst != nh->daddr)
 239                set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
 240
 241        if (ipv4_key->ipv4_tos != nh->tos)
 242                ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
 243
 244        if (ipv4_key->ipv4_ttl != nh->ttl)
 245                set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
 246
 247        return 0;
 248}
 249
 250static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
 251{
 252        struct ipv6hdr *nh;
 253        int err;
 254        __be32 *saddr;
 255        __be32 *daddr;
 256
 257        err = make_writable(skb, skb_network_offset(skb) +
 258                            sizeof(struct ipv6hdr));
 259        if (unlikely(err))
 260                return err;
 261
 262        nh = ipv6_hdr(skb);
 263        saddr = (__be32 *)&nh->saddr;
 264        daddr = (__be32 *)&nh->daddr;
 265
 266        if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
 267                set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
 268                              ipv6_key->ipv6_src, true);
 269
 270        if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
 271                unsigned int offset = 0;
 272                int flags = IP6_FH_F_SKIP_RH;
 273                bool recalc_csum = true;
 274
 275                if (ipv6_ext_hdr(nh->nexthdr))
 276                        recalc_csum = ipv6_find_hdr(skb, &offset,
 277                                                    NEXTHDR_ROUTING, NULL,
 278                                                    &flags) != NEXTHDR_ROUTING;
 279
 280                set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
 281                              ipv6_key->ipv6_dst, recalc_csum);
 282        }
 283
 284        set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
 285        set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
 286        nh->hop_limit = ipv6_key->ipv6_hlimit;
 287
 288        return 0;
 289}
 290
 291/* Must follow make_writable() since that can move the skb data. */
 292static void set_tp_port(struct sk_buff *skb, __be16 *port,
 293                         __be16 new_port, __sum16 *check)
 294{
 295        inet_proto_csum_replace2(check, skb, *port, new_port, 0);
 296        *port = new_port;
 297        skb->rxhash = 0;
 298}
 299
 300static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
 301{
 302        struct udphdr *uh = udp_hdr(skb);
 303
 304        if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
 305                set_tp_port(skb, port, new_port, &uh->check);
 306
 307                if (!uh->check)
 308                        uh->check = CSUM_MANGLED_0;
 309        } else {
 310                *port = new_port;
 311                skb->rxhash = 0;
 312        }
 313}
 314
 315static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
 316{
 317        struct udphdr *uh;
 318        int err;
 319
 320        err = make_writable(skb, skb_transport_offset(skb) +
 321                                 sizeof(struct udphdr));
 322        if (unlikely(err))
 323                return err;
 324
 325        uh = udp_hdr(skb);
 326        if (udp_port_key->udp_src != uh->source)
 327                set_udp_port(skb, &uh->source, udp_port_key->udp_src);
 328
 329        if (udp_port_key->udp_dst != uh->dest)
 330                set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
 331
 332        return 0;
 333}
 334
 335static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
 336{
 337        struct tcphdr *th;
 338        int err;
 339
 340        err = make_writable(skb, skb_transport_offset(skb) +
 341                                 sizeof(struct tcphdr));
 342        if (unlikely(err))
 343                return err;
 344
 345        th = tcp_hdr(skb);
 346        if (tcp_port_key->tcp_src != th->source)
 347                set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
 348
 349        if (tcp_port_key->tcp_dst != th->dest)
 350                set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
 351
 352        return 0;
 353}
 354
 355static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 356{
 357        struct vport *vport;
 358
 359        if (unlikely(!skb))
 360                return -ENOMEM;
 361
 362        vport = ovs_vport_rcu(dp, out_port);
 363        if (unlikely(!vport)) {
 364                kfree_skb(skb);
 365                return -ENODEV;
 366        }
 367
 368        ovs_vport_send(vport, skb);
 369        return 0;
 370}
 371
 372static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 373                            const struct nlattr *attr)
 374{
 375        struct dp_upcall_info upcall;
 376        const struct nlattr *a;
 377        int rem;
 378
 379        upcall.cmd = OVS_PACKET_CMD_ACTION;
 380        upcall.key = &OVS_CB(skb)->flow->key;
 381        upcall.userdata = NULL;
 382        upcall.portid = 0;
 383
 384        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 385                 a = nla_next(a, &rem)) {
 386                switch (nla_type(a)) {
 387                case OVS_USERSPACE_ATTR_USERDATA:
 388                        upcall.userdata = a;
 389                        break;
 390
 391                case OVS_USERSPACE_ATTR_PID:
 392                        upcall.portid = nla_get_u32(a);
 393                        break;
 394                }
 395        }
 396
 397        return ovs_dp_upcall(dp, skb, &upcall);
 398}
 399
 400static int sample(struct datapath *dp, struct sk_buff *skb,
 401                  const struct nlattr *attr)
 402{
 403        const struct nlattr *acts_list = NULL;
 404        const struct nlattr *a;
 405        int rem;
 406
 407        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 408                 a = nla_next(a, &rem)) {
 409                switch (nla_type(a)) {
 410                case OVS_SAMPLE_ATTR_PROBABILITY:
 411                        if (net_random() >= nla_get_u32(a))
 412                                return 0;
 413                        break;
 414
 415                case OVS_SAMPLE_ATTR_ACTIONS:
 416                        acts_list = a;
 417                        break;
 418                }
 419        }
 420
 421        return do_execute_actions(dp, skb, nla_data(acts_list),
 422                                                 nla_len(acts_list), true);
 423}
 424
 425static int execute_set_action(struct sk_buff *skb,
 426                                 const struct nlattr *nested_attr)
 427{
 428        int err = 0;
 429
 430        switch (nla_type(nested_attr)) {
 431        case OVS_KEY_ATTR_PRIORITY:
 432                skb->priority = nla_get_u32(nested_attr);
 433                break;
 434
 435        case OVS_KEY_ATTR_SKB_MARK:
 436                skb->mark = nla_get_u32(nested_attr);
 437                break;
 438
 439        case OVS_KEY_ATTR_IPV4_TUNNEL:
 440                OVS_CB(skb)->tun_key = nla_data(nested_attr);
 441                break;
 442
 443        case OVS_KEY_ATTR_ETHERNET:
 444                err = set_eth_addr(skb, nla_data(nested_attr));
 445                break;
 446
 447        case OVS_KEY_ATTR_IPV4:
 448                err = set_ipv4(skb, nla_data(nested_attr));
 449                break;
 450
 451        case OVS_KEY_ATTR_IPV6:
 452                err = set_ipv6(skb, nla_data(nested_attr));
 453                break;
 454
 455        case OVS_KEY_ATTR_TCP:
 456                err = set_tcp(skb, nla_data(nested_attr));
 457                break;
 458
 459        case OVS_KEY_ATTR_UDP:
 460                err = set_udp(skb, nla_data(nested_attr));
 461                break;
 462        }
 463
 464        return err;
 465}
 466
 467/* Execute a list of actions against 'skb'. */
 468static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 469                        const struct nlattr *attr, int len, bool keep_skb)
 470{
 471        /* Every output action needs a separate clone of 'skb', but the common
 472         * case is just a single output action, so that doing a clone and
 473         * then freeing the original skbuff is wasteful.  So the following code
 474         * is slightly obscure just to avoid that. */
 475        int prev_port = -1;
 476        const struct nlattr *a;
 477        int rem;
 478
 479        for (a = attr, rem = len; rem > 0;
 480             a = nla_next(a, &rem)) {
 481                int err = 0;
 482
 483                if (prev_port != -1) {
 484                        do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
 485                        prev_port = -1;
 486                }
 487
 488                switch (nla_type(a)) {
 489                case OVS_ACTION_ATTR_OUTPUT:
 490                        prev_port = nla_get_u32(a);
 491                        break;
 492
 493                case OVS_ACTION_ATTR_USERSPACE:
 494                        output_userspace(dp, skb, a);
 495                        break;
 496
 497                case OVS_ACTION_ATTR_PUSH_VLAN:
 498                        err = push_vlan(skb, nla_data(a));
 499                        if (unlikely(err)) /* skb already freed. */
 500                                return err;
 501                        break;
 502
 503                case OVS_ACTION_ATTR_POP_VLAN:
 504                        err = pop_vlan(skb);
 505                        break;
 506
 507                case OVS_ACTION_ATTR_SET:
 508                        err = execute_set_action(skb, nla_data(a));
 509                        break;
 510
 511                case OVS_ACTION_ATTR_SAMPLE:
 512                        err = sample(dp, skb, a);
 513                        break;
 514                }
 515
 516                if (unlikely(err)) {
 517                        kfree_skb(skb);
 518                        return err;
 519                }
 520        }
 521
 522        if (prev_port != -1) {
 523                if (keep_skb)
 524                        skb = skb_clone(skb, GFP_ATOMIC);
 525
 526                do_output(dp, skb, prev_port);
 527        } else if (!keep_skb)
 528                consume_skb(skb);
 529
 530        return 0;
 531}
 532
 533/* Execute a list of actions against 'skb'. */
 534int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
 535{
 536        struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
 537
 538        OVS_CB(skb)->tun_key = NULL;
 539        return do_execute_actions(dp, skb, acts->actions,
 540                                         acts->actions_len, false);
 541}
 542