linux/net/openvswitch/flow_netlink.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2013 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include "flow.h"
  22#include "datapath.h"
  23#include <linux/uaccess.h>
  24#include <linux/netdevice.h>
  25#include <linux/etherdevice.h>
  26#include <linux/if_ether.h>
  27#include <linux/if_vlan.h>
  28#include <net/llc_pdu.h>
  29#include <linux/kernel.h>
  30#include <linux/jhash.h>
  31#include <linux/jiffies.h>
  32#include <linux/llc.h>
  33#include <linux/module.h>
  34#include <linux/in.h>
  35#include <linux/rcupdate.h>
  36#include <linux/if_arp.h>
  37#include <linux/ip.h>
  38#include <linux/ipv6.h>
  39#include <linux/sctp.h>
  40#include <linux/tcp.h>
  41#include <linux/udp.h>
  42#include <linux/icmp.h>
  43#include <linux/icmpv6.h>
  44#include <linux/rculist.h>
  45#include <net/ip.h>
  46#include <net/ipv6.h>
  47#include <net/ndisc.h>
  48
  49#include "flow_netlink.h"
  50
  51static void update_range__(struct sw_flow_match *match,
  52                           size_t offset, size_t size, bool is_mask)
  53{
  54        struct sw_flow_key_range *range = NULL;
  55        size_t start = rounddown(offset, sizeof(long));
  56        size_t end = roundup(offset + size, sizeof(long));
  57
  58        if (!is_mask)
  59                range = &match->range;
  60        else if (match->mask)
  61                range = &match->mask->range;
  62
  63        if (!range)
  64                return;
  65
  66        if (range->start == range->end) {
  67                range->start = start;
  68                range->end = end;
  69                return;
  70        }
  71
  72        if (range->start > start)
  73                range->start = start;
  74
  75        if (range->end < end)
  76                range->end = end;
  77}
  78
  79#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
  80        do { \
  81                update_range__(match, offsetof(struct sw_flow_key, field),  \
  82                                     sizeof((match)->key->field), is_mask); \
  83                if (is_mask) {                                              \
  84                        if ((match)->mask)                                  \
  85                                (match)->mask->key.field = value;           \
  86                } else {                                                    \
  87                        (match)->key->field = value;                        \
  88                }                                                           \
  89        } while (0)
  90
  91#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
  92        do { \
  93                update_range__(match, offsetof(struct sw_flow_key, field),  \
  94                                len, is_mask);                              \
  95                if (is_mask) {                                              \
  96                        if ((match)->mask)                                  \
  97                                memcpy(&(match)->mask->key.field, value_p, len);\
  98                } else {                                                    \
  99                        memcpy(&(match)->key->field, value_p, len);         \
 100                }                                                           \
 101        } while (0)
 102
 103static u16 range_n_bytes(const struct sw_flow_key_range *range)
 104{
 105        return range->end - range->start;
 106}
 107
 108static bool match_validate(const struct sw_flow_match *match,
 109                           u64 key_attrs, u64 mask_attrs)
 110{
 111        u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
 112        u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 113
 114        /* The following mask attributes allowed only if they
 115         * pass the validation tests. */
 116        mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
 117                        | (1 << OVS_KEY_ATTR_IPV6)
 118                        | (1 << OVS_KEY_ATTR_TCP)
 119                        | (1 << OVS_KEY_ATTR_TCP_FLAGS)
 120                        | (1 << OVS_KEY_ATTR_UDP)
 121                        | (1 << OVS_KEY_ATTR_SCTP)
 122                        | (1 << OVS_KEY_ATTR_ICMP)
 123                        | (1 << OVS_KEY_ATTR_ICMPV6)
 124                        | (1 << OVS_KEY_ATTR_ARP)
 125                        | (1 << OVS_KEY_ATTR_ND));
 126
 127        /* Always allowed mask fields. */
 128        mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
 129                       | (1 << OVS_KEY_ATTR_IN_PORT)
 130                       | (1 << OVS_KEY_ATTR_ETHERTYPE));
 131
 132        /* Check key attributes. */
 133        if (match->key->eth.type == htons(ETH_P_ARP)
 134                        || match->key->eth.type == htons(ETH_P_RARP)) {
 135                key_expected |= 1 << OVS_KEY_ATTR_ARP;
 136                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 137                        mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
 138        }
 139
 140        if (match->key->eth.type == htons(ETH_P_IP)) {
 141                key_expected |= 1 << OVS_KEY_ATTR_IPV4;
 142                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 143                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
 144
 145                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 146                        if (match->key->ip.proto == IPPROTO_UDP) {
 147                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 148                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 149                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 150                        }
 151
 152                        if (match->key->ip.proto == IPPROTO_SCTP) {
 153                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 154                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 155                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 156                        }
 157
 158                        if (match->key->ip.proto == IPPROTO_TCP) {
 159                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 160                                key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 161                                if (match->mask && (match->mask->key.ip.proto == 0xff)) {
 162                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 163                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 164                                }
 165                        }
 166
 167                        if (match->key->ip.proto == IPPROTO_ICMP) {
 168                                key_expected |= 1 << OVS_KEY_ATTR_ICMP;
 169                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 170                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
 171                        }
 172                }
 173        }
 174
 175        if (match->key->eth.type == htons(ETH_P_IPV6)) {
 176                key_expected |= 1 << OVS_KEY_ATTR_IPV6;
 177                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 178                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
 179
 180                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 181                        if (match->key->ip.proto == IPPROTO_UDP) {
 182                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 183                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 184                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 185                        }
 186
 187                        if (match->key->ip.proto == IPPROTO_SCTP) {
 188                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 189                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 190                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 191                        }
 192
 193                        if (match->key->ip.proto == IPPROTO_TCP) {
 194                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 195                                key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 196                                if (match->mask && (match->mask->key.ip.proto == 0xff)) {
 197                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 198                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 199                                }
 200                        }
 201
 202                        if (match->key->ip.proto == IPPROTO_ICMPV6) {
 203                                key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
 204                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 205                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
 206
 207                                if (match->key->tp.src ==
 208                                                htons(NDISC_NEIGHBOUR_SOLICITATION) ||
 209                                    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 210                                        key_expected |= 1 << OVS_KEY_ATTR_ND;
 211                                        if (match->mask && (match->mask->key.tp.src == htons(0xffff)))
 212                                                mask_allowed |= 1 << OVS_KEY_ATTR_ND;
 213                                }
 214                        }
 215                }
 216        }
 217
 218        if ((key_attrs & key_expected) != key_expected) {
 219                /* Key attributes check failed. */
 220                OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
 221                                (unsigned long long)key_attrs, (unsigned long long)key_expected);
 222                return false;
 223        }
 224
 225        if ((mask_attrs & mask_allowed) != mask_attrs) {
 226                /* Mask attributes check failed. */
 227                OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
 228                                (unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
 229                return false;
 230        }
 231
 232        return true;
 233}
 234
 235/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
 236static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 237        [OVS_KEY_ATTR_ENCAP] = -1,
 238        [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
 239        [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
 240        [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
 241        [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
 242        [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
 243        [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
 244        [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
 245        [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
 246        [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
 247        [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
 248        [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
 249        [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
 250        [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
 251        [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
 252        [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
 253        [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
 254        [OVS_KEY_ATTR_TUNNEL] = -1,
 255};
 256
 257static bool is_all_zero(const u8 *fp, size_t size)
 258{
 259        int i;
 260
 261        if (!fp)
 262                return false;
 263
 264        for (i = 0; i < size; i++)
 265                if (fp[i])
 266                        return false;
 267
 268        return true;
 269}
 270
 271static int __parse_flow_nlattrs(const struct nlattr *attr,
 272                                const struct nlattr *a[],
 273                                u64 *attrsp, bool nz)
 274{
 275        const struct nlattr *nla;
 276        u64 attrs;
 277        int rem;
 278
 279        attrs = *attrsp;
 280        nla_for_each_nested(nla, attr, rem) {
 281                u16 type = nla_type(nla);
 282                int expected_len;
 283
 284                if (type > OVS_KEY_ATTR_MAX) {
 285                        OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
 286                                  type, OVS_KEY_ATTR_MAX);
 287                        return -EINVAL;
 288                }
 289
 290                if (attrs & (1 << type)) {
 291                        OVS_NLERR("Duplicate key attribute (type %d).\n", type);
 292                        return -EINVAL;
 293                }
 294
 295                expected_len = ovs_key_lens[type];
 296                if (nla_len(nla) != expected_len && expected_len != -1) {
 297                        OVS_NLERR("Key attribute has unexpected length (type=%d"
 298                                  ", length=%d, expected=%d).\n", type,
 299                                  nla_len(nla), expected_len);
 300                        return -EINVAL;
 301                }
 302
 303                if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
 304                        attrs |= 1 << type;
 305                        a[type] = nla;
 306                }
 307        }
 308        if (rem) {
 309                OVS_NLERR("Message has %d unknown bytes.\n", rem);
 310                return -EINVAL;
 311        }
 312
 313        *attrsp = attrs;
 314        return 0;
 315}
 316
 317static int parse_flow_mask_nlattrs(const struct nlattr *attr,
 318                                   const struct nlattr *a[], u64 *attrsp)
 319{
 320        return __parse_flow_nlattrs(attr, a, attrsp, true);
 321}
 322
 323static int parse_flow_nlattrs(const struct nlattr *attr,
 324                              const struct nlattr *a[], u64 *attrsp)
 325{
 326        return __parse_flow_nlattrs(attr, a, attrsp, false);
 327}
 328
 329static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 330                                struct sw_flow_match *match, bool is_mask)
 331{
 332        struct nlattr *a;
 333        int rem;
 334        bool ttl = false;
 335        __be16 tun_flags = 0;
 336
 337        nla_for_each_nested(a, attr, rem) {
 338                int type = nla_type(a);
 339                static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
 340                        [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
 341                        [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
 342                        [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
 343                        [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
 344                        [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
 345                        [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
 346                        [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
 347                };
 348
 349                if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
 350                        OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
 351                        type, OVS_TUNNEL_KEY_ATTR_MAX);
 352                        return -EINVAL;
 353                }
 354
 355                if (ovs_tunnel_key_lens[type] != nla_len(a)) {
 356                        OVS_NLERR("IPv4 tunnel attribute type has unexpected "
 357                                  " length (type=%d, length=%d, expected=%d).\n",
 358                                  type, nla_len(a), ovs_tunnel_key_lens[type]);
 359                        return -EINVAL;
 360                }
 361
 362                switch (type) {
 363                case OVS_TUNNEL_KEY_ATTR_ID:
 364                        SW_FLOW_KEY_PUT(match, tun_key.tun_id,
 365                                        nla_get_be64(a), is_mask);
 366                        tun_flags |= TUNNEL_KEY;
 367                        break;
 368                case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
 369                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
 370                                        nla_get_be32(a), is_mask);
 371                        break;
 372                case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
 373                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
 374                                        nla_get_be32(a), is_mask);
 375                        break;
 376                case OVS_TUNNEL_KEY_ATTR_TOS:
 377                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
 378                                        nla_get_u8(a), is_mask);
 379                        break;
 380                case OVS_TUNNEL_KEY_ATTR_TTL:
 381                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
 382                                        nla_get_u8(a), is_mask);
 383                        ttl = true;
 384                        break;
 385                case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
 386                        tun_flags |= TUNNEL_DONT_FRAGMENT;
 387                        break;
 388                case OVS_TUNNEL_KEY_ATTR_CSUM:
 389                        tun_flags |= TUNNEL_CSUM;
 390                        break;
 391                default:
 392                        return -EINVAL;
 393                }
 394        }
 395
 396        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 397
 398        if (rem > 0) {
 399                OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
 400                return -EINVAL;
 401        }
 402
 403        if (!is_mask) {
 404                if (!match->key->tun_key.ipv4_dst) {
 405                        OVS_NLERR("IPv4 tunnel destination address is zero.\n");
 406                        return -EINVAL;
 407                }
 408
 409                if (!ttl) {
 410                        OVS_NLERR("IPv4 tunnel TTL not specified.\n");
 411                        return -EINVAL;
 412                }
 413        }
 414
 415        return 0;
 416}
 417
 418static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 419                              const struct ovs_key_ipv4_tunnel *tun_key,
 420                              const struct ovs_key_ipv4_tunnel *output)
 421{
 422        struct nlattr *nla;
 423
 424        nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
 425        if (!nla)
 426                return -EMSGSIZE;
 427
 428        if (output->tun_flags & TUNNEL_KEY &&
 429            nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
 430                return -EMSGSIZE;
 431        if (output->ipv4_src &&
 432                nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
 433                return -EMSGSIZE;
 434        if (output->ipv4_dst &&
 435                nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
 436                return -EMSGSIZE;
 437        if (output->ipv4_tos &&
 438                nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
 439                return -EMSGSIZE;
 440        if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
 441                return -EMSGSIZE;
 442        if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
 443                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
 444                return -EMSGSIZE;
 445        if ((output->tun_flags & TUNNEL_CSUM) &&
 446                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 447                return -EMSGSIZE;
 448
 449        nla_nest_end(skb, nla);
 450        return 0;
 451}
 452
 453
 454static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 455                                 const struct nlattr **a, bool is_mask)
 456{
 457        if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
 458                SW_FLOW_KEY_PUT(match, phy.priority,
 459                          nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
 460                *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
 461        }
 462
 463        if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
 464                u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
 465
 466                if (is_mask)
 467                        in_port = 0xffffffff; /* Always exact match in_port. */
 468                else if (in_port >= DP_MAX_PORTS)
 469                        return -EINVAL;
 470
 471                SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
 472                *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
 473        } else if (!is_mask) {
 474                SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
 475        }
 476
 477        if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
 478                uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
 479
 480                SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
 481                *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
 482        }
 483        if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 484                if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
 485                                         is_mask))
 486                        return -EINVAL;
 487                *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 488        }
 489        return 0;
 490}
 491
 492static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
 493                                const struct nlattr **a, bool is_mask)
 494{
 495        int err;
 496        u64 orig_attrs = attrs;
 497
 498        err = metadata_from_nlattrs(match, &attrs, a, is_mask);
 499        if (err)
 500                return err;
 501
 502        if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
 503                const struct ovs_key_ethernet *eth_key;
 504
 505                eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
 506                SW_FLOW_KEY_MEMCPY(match, eth.src,
 507                                eth_key->eth_src, ETH_ALEN, is_mask);
 508                SW_FLOW_KEY_MEMCPY(match, eth.dst,
 509                                eth_key->eth_dst, ETH_ALEN, is_mask);
 510                attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
 511        }
 512
 513        if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
 514                __be16 tci;
 515
 516                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 517                if (!(tci & htons(VLAN_TAG_PRESENT))) {
 518                        if (is_mask)
 519                                OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
 520                        else
 521                                OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
 522
 523                        return -EINVAL;
 524                }
 525
 526                SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
 527                attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
 528        } else if (!is_mask)
 529                SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
 530
 531        if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
 532                __be16 eth_type;
 533
 534                eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
 535                if (is_mask) {
 536                        /* Always exact match EtherType. */
 537                        eth_type = htons(0xffff);
 538                } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
 539                        OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
 540                                        ntohs(eth_type), ETH_P_802_3_MIN);
 541                        return -EINVAL;
 542                }
 543
 544                SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
 545                attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
 546        } else if (!is_mask) {
 547                SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
 548        }
 549
 550        if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
 551                const struct ovs_key_ipv4 *ipv4_key;
 552
 553                ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
 554                if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
 555                        OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
 556                                ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
 557                        return -EINVAL;
 558                }
 559                SW_FLOW_KEY_PUT(match, ip.proto,
 560                                ipv4_key->ipv4_proto, is_mask);
 561                SW_FLOW_KEY_PUT(match, ip.tos,
 562                                ipv4_key->ipv4_tos, is_mask);
 563                SW_FLOW_KEY_PUT(match, ip.ttl,
 564                                ipv4_key->ipv4_ttl, is_mask);
 565                SW_FLOW_KEY_PUT(match, ip.frag,
 566                                ipv4_key->ipv4_frag, is_mask);
 567                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
 568                                ipv4_key->ipv4_src, is_mask);
 569                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
 570                                ipv4_key->ipv4_dst, is_mask);
 571                attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
 572        }
 573
 574        if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
 575                const struct ovs_key_ipv6 *ipv6_key;
 576
 577                ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
 578                if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
 579                        OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
 580                                ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
 581                        return -EINVAL;
 582                }
 583                SW_FLOW_KEY_PUT(match, ipv6.label,
 584                                ipv6_key->ipv6_label, is_mask);
 585                SW_FLOW_KEY_PUT(match, ip.proto,
 586                                ipv6_key->ipv6_proto, is_mask);
 587                SW_FLOW_KEY_PUT(match, ip.tos,
 588                                ipv6_key->ipv6_tclass, is_mask);
 589                SW_FLOW_KEY_PUT(match, ip.ttl,
 590                                ipv6_key->ipv6_hlimit, is_mask);
 591                SW_FLOW_KEY_PUT(match, ip.frag,
 592                                ipv6_key->ipv6_frag, is_mask);
 593                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
 594                                ipv6_key->ipv6_src,
 595                                sizeof(match->key->ipv6.addr.src),
 596                                is_mask);
 597                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
 598                                ipv6_key->ipv6_dst,
 599                                sizeof(match->key->ipv6.addr.dst),
 600                                is_mask);
 601
 602                attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
 603        }
 604
 605        if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
 606                const struct ovs_key_arp *arp_key;
 607
 608                arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
 609                if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
 610                        OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
 611                                  arp_key->arp_op);
 612                        return -EINVAL;
 613                }
 614
 615                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
 616                                arp_key->arp_sip, is_mask);
 617                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
 618                        arp_key->arp_tip, is_mask);
 619                SW_FLOW_KEY_PUT(match, ip.proto,
 620                                ntohs(arp_key->arp_op), is_mask);
 621                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
 622                                arp_key->arp_sha, ETH_ALEN, is_mask);
 623                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
 624                                arp_key->arp_tha, ETH_ALEN, is_mask);
 625
 626                attrs &= ~(1 << OVS_KEY_ATTR_ARP);
 627        }
 628
 629        if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
 630                const struct ovs_key_tcp *tcp_key;
 631
 632                tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
 633                SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
 634                SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
 635                attrs &= ~(1 << OVS_KEY_ATTR_TCP);
 636        }
 637
 638        if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
 639                if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
 640                        SW_FLOW_KEY_PUT(match, tp.flags,
 641                                        nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
 642                                        is_mask);
 643                } else {
 644                        SW_FLOW_KEY_PUT(match, tp.flags,
 645                                        nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
 646                                        is_mask);
 647                }
 648                attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
 649        }
 650
 651        if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
 652                const struct ovs_key_udp *udp_key;
 653
 654                udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
 655                SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
 656                SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
 657                attrs &= ~(1 << OVS_KEY_ATTR_UDP);
 658        }
 659
 660        if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
 661                const struct ovs_key_sctp *sctp_key;
 662
 663                sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
 664                SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
 665                SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
 666                attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
 667        }
 668
 669        if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
 670                const struct ovs_key_icmp *icmp_key;
 671
 672                icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
 673                SW_FLOW_KEY_PUT(match, tp.src,
 674                                htons(icmp_key->icmp_type), is_mask);
 675                SW_FLOW_KEY_PUT(match, tp.dst,
 676                                htons(icmp_key->icmp_code), is_mask);
 677                attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
 678        }
 679
 680        if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
 681                const struct ovs_key_icmpv6 *icmpv6_key;
 682
 683                icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
 684                SW_FLOW_KEY_PUT(match, tp.src,
 685                                htons(icmpv6_key->icmpv6_type), is_mask);
 686                SW_FLOW_KEY_PUT(match, tp.dst,
 687                                htons(icmpv6_key->icmpv6_code), is_mask);
 688                attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
 689        }
 690
 691        if (attrs & (1 << OVS_KEY_ATTR_ND)) {
 692                const struct ovs_key_nd *nd_key;
 693
 694                nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
 695                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
 696                        nd_key->nd_target,
 697                        sizeof(match->key->ipv6.nd.target),
 698                        is_mask);
 699                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
 700                        nd_key->nd_sll, ETH_ALEN, is_mask);
 701                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
 702                                nd_key->nd_tll, ETH_ALEN, is_mask);
 703                attrs &= ~(1 << OVS_KEY_ATTR_ND);
 704        }
 705
 706        if (attrs != 0)
 707                return -EINVAL;
 708
 709        return 0;
 710}
 711
 712static void sw_flow_mask_set(struct sw_flow_mask *mask,
 713                             struct sw_flow_key_range *range, u8 val)
 714{
 715        u8 *m = (u8 *)&mask->key + range->start;
 716
 717        mask->range = *range;
 718        memset(m, val, range_n_bytes(range));
 719}
 720
 721/**
 722 * ovs_nla_get_match - parses Netlink attributes into a flow key and
 723 * mask. In case the 'mask' is NULL, the flow is treated as exact match
 724 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
 725 * does not include any don't care bit.
 726 * @match: receives the extracted flow match information.
 727 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
 728 * sequence. The fields should of the packet that triggered the creation
 729 * of this flow.
 730 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
 731 * attribute specifies the mask field of the wildcarded flow.
 732 */
 733int ovs_nla_get_match(struct sw_flow_match *match,
 734                      const struct nlattr *key,
 735                      const struct nlattr *mask)
 736{
 737        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
 738        const struct nlattr *encap;
 739        u64 key_attrs = 0;
 740        u64 mask_attrs = 0;
 741        bool encap_valid = false;
 742        int err;
 743
 744        err = parse_flow_nlattrs(key, a, &key_attrs);
 745        if (err)
 746                return err;
 747
 748        if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
 749            (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
 750            (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
 751                __be16 tci;
 752
 753                if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
 754                      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
 755                        OVS_NLERR("Invalid Vlan frame.\n");
 756                        return -EINVAL;
 757                }
 758
 759                key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
 760                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 761                encap = a[OVS_KEY_ATTR_ENCAP];
 762                key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
 763                encap_valid = true;
 764
 765                if (tci & htons(VLAN_TAG_PRESENT)) {
 766                        err = parse_flow_nlattrs(encap, a, &key_attrs);
 767                        if (err)
 768                                return err;
 769                } else if (!tci) {
 770                        /* Corner case for truncated 802.1Q header. */
 771                        if (nla_len(encap)) {
 772                                OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
 773                                return -EINVAL;
 774                        }
 775                } else {
 776                        OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
 777                        return  -EINVAL;
 778                }
 779        }
 780
 781        err = ovs_key_from_nlattrs(match, key_attrs, a, false);
 782        if (err)
 783                return err;
 784
 785        if (mask) {
 786                err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
 787                if (err)
 788                        return err;
 789
 790                if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP)  {
 791                        __be16 eth_type = 0;
 792                        __be16 tci = 0;
 793
 794                        if (!encap_valid) {
 795                                OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
 796                                return  -EINVAL;
 797                        }
 798
 799                        mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
 800                        if (a[OVS_KEY_ATTR_ETHERTYPE])
 801                                eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
 802
 803                        if (eth_type == htons(0xffff)) {
 804                                mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
 805                                encap = a[OVS_KEY_ATTR_ENCAP];
 806                                err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
 807                        } else {
 808                                OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
 809                                                ntohs(eth_type));
 810                                return -EINVAL;
 811                        }
 812
 813                        if (a[OVS_KEY_ATTR_VLAN])
 814                                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 815
 816                        if (!(tci & htons(VLAN_TAG_PRESENT))) {
 817                                OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
 818                                return -EINVAL;
 819                        }
 820                }
 821
 822                err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
 823                if (err)
 824                        return err;
 825        } else {
 826                /* Populate exact match flow's key mask. */
 827                if (match->mask)
 828                        sw_flow_mask_set(match->mask, &match->range, 0xff);
 829        }
 830
 831        if (!match_validate(match, key_attrs, mask_attrs))
 832                return -EINVAL;
 833
 834        return 0;
 835}
 836
 837/**
 838 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
 839 * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
 840 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
 841 * sequence.
 842 *
 843 * This parses a series of Netlink attributes that form a flow key, which must
 844 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
 845 * get the metadata, that is, the parts of the flow key that cannot be
 846 * extracted from the packet itself.
 847 */
 848
 849int ovs_nla_get_flow_metadata(struct sw_flow *flow,
 850                              const struct nlattr *attr)
 851{
 852        struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
 853        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
 854        u64 attrs = 0;
 855        int err;
 856        struct sw_flow_match match;
 857
 858        flow->key.phy.in_port = DP_MAX_PORTS;
 859        flow->key.phy.priority = 0;
 860        flow->key.phy.skb_mark = 0;
 861        memset(tun_key, 0, sizeof(flow->key.tun_key));
 862
 863        err = parse_flow_nlattrs(attr, a, &attrs);
 864        if (err)
 865                return -EINVAL;
 866
 867        memset(&match, 0, sizeof(match));
 868        match.key = &flow->key;
 869
 870        err = metadata_from_nlattrs(&match, &attrs, a, false);
 871        if (err)
 872                return err;
 873
 874        return 0;
 875}
 876
 877int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 878                     const struct sw_flow_key *output, struct sk_buff *skb)
 879{
 880        struct ovs_key_ethernet *eth_key;
 881        struct nlattr *nla, *encap;
 882        bool is_mask = (swkey != output);
 883
 884        if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
 885                goto nla_put_failure;
 886
 887        if ((swkey->tun_key.ipv4_dst || is_mask) &&
 888            ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
 889                goto nla_put_failure;
 890
 891        if (swkey->phy.in_port == DP_MAX_PORTS) {
 892                if (is_mask && (output->phy.in_port == 0xffff))
 893                        if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
 894                                goto nla_put_failure;
 895        } else {
 896                u16 upper_u16;
 897                upper_u16 = !is_mask ? 0 : 0xffff;
 898
 899                if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
 900                                (upper_u16 << 16) | output->phy.in_port))
 901                        goto nla_put_failure;
 902        }
 903
 904        if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
 905                goto nla_put_failure;
 906
 907        nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
 908        if (!nla)
 909                goto nla_put_failure;
 910
 911        eth_key = nla_data(nla);
 912        ether_addr_copy(eth_key->eth_src, output->eth.src);
 913        ether_addr_copy(eth_key->eth_dst, output->eth.dst);
 914
 915        if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
 916                __be16 eth_type;
 917                eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
 918                if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
 919                    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
 920                        goto nla_put_failure;
 921                encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
 922                if (!swkey->eth.tci)
 923                        goto unencap;
 924        } else
 925                encap = NULL;
 926
 927        if (swkey->eth.type == htons(ETH_P_802_2)) {
 928                /*
 929                 * Ethertype 802.2 is represented in the netlink with omitted
 930                 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
 931                 * 0xffff in the mask attribute.  Ethertype can also
 932                 * be wildcarded.
 933                 */
 934                if (is_mask && output->eth.type)
 935                        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
 936                                                output->eth.type))
 937                                goto nla_put_failure;
 938                goto unencap;
 939        }
 940
 941        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
 942                goto nla_put_failure;
 943
 944        if (swkey->eth.type == htons(ETH_P_IP)) {
 945                struct ovs_key_ipv4 *ipv4_key;
 946
 947                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
 948                if (!nla)
 949                        goto nla_put_failure;
 950                ipv4_key = nla_data(nla);
 951                ipv4_key->ipv4_src = output->ipv4.addr.src;
 952                ipv4_key->ipv4_dst = output->ipv4.addr.dst;
 953                ipv4_key->ipv4_proto = output->ip.proto;
 954                ipv4_key->ipv4_tos = output->ip.tos;
 955                ipv4_key->ipv4_ttl = output->ip.ttl;
 956                ipv4_key->ipv4_frag = output->ip.frag;
 957        } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
 958                struct ovs_key_ipv6 *ipv6_key;
 959
 960                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
 961                if (!nla)
 962                        goto nla_put_failure;
 963                ipv6_key = nla_data(nla);
 964                memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
 965                                sizeof(ipv6_key->ipv6_src));
 966                memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
 967                                sizeof(ipv6_key->ipv6_dst));
 968                ipv6_key->ipv6_label = output->ipv6.label;
 969                ipv6_key->ipv6_proto = output->ip.proto;
 970                ipv6_key->ipv6_tclass = output->ip.tos;
 971                ipv6_key->ipv6_hlimit = output->ip.ttl;
 972                ipv6_key->ipv6_frag = output->ip.frag;
 973        } else if (swkey->eth.type == htons(ETH_P_ARP) ||
 974                   swkey->eth.type == htons(ETH_P_RARP)) {
 975                struct ovs_key_arp *arp_key;
 976
 977                nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
 978                if (!nla)
 979                        goto nla_put_failure;
 980                arp_key = nla_data(nla);
 981                memset(arp_key, 0, sizeof(struct ovs_key_arp));
 982                arp_key->arp_sip = output->ipv4.addr.src;
 983                arp_key->arp_tip = output->ipv4.addr.dst;
 984                arp_key->arp_op = htons(output->ip.proto);
 985                ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
 986                ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
 987        }
 988
 989        if ((swkey->eth.type == htons(ETH_P_IP) ||
 990             swkey->eth.type == htons(ETH_P_IPV6)) &&
 991             swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
 992
 993                if (swkey->ip.proto == IPPROTO_TCP) {
 994                        struct ovs_key_tcp *tcp_key;
 995
 996                        nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
 997                        if (!nla)
 998                                goto nla_put_failure;
 999                        tcp_key = nla_data(nla);
1000                        tcp_key->tcp_src = output->tp.src;
1001                        tcp_key->tcp_dst = output->tp.dst;
1002                        if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1003                                         output->tp.flags))
1004                                goto nla_put_failure;
1005                } else if (swkey->ip.proto == IPPROTO_UDP) {
1006                        struct ovs_key_udp *udp_key;
1007
1008                        nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1009                        if (!nla)
1010                                goto nla_put_failure;
1011                        udp_key = nla_data(nla);
1012                        udp_key->udp_src = output->tp.src;
1013                        udp_key->udp_dst = output->tp.dst;
1014                } else if (swkey->ip.proto == IPPROTO_SCTP) {
1015                        struct ovs_key_sctp *sctp_key;
1016
1017                        nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1018                        if (!nla)
1019                                goto nla_put_failure;
1020                        sctp_key = nla_data(nla);
1021                        sctp_key->sctp_src = output->tp.src;
1022                        sctp_key->sctp_dst = output->tp.dst;
1023                } else if (swkey->eth.type == htons(ETH_P_IP) &&
1024                           swkey->ip.proto == IPPROTO_ICMP) {
1025                        struct ovs_key_icmp *icmp_key;
1026
1027                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1028                        if (!nla)
1029                                goto nla_put_failure;
1030                        icmp_key = nla_data(nla);
1031                        icmp_key->icmp_type = ntohs(output->tp.src);
1032                        icmp_key->icmp_code = ntohs(output->tp.dst);
1033                } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1034                           swkey->ip.proto == IPPROTO_ICMPV6) {
1035                        struct ovs_key_icmpv6 *icmpv6_key;
1036
1037                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1038                                                sizeof(*icmpv6_key));
1039                        if (!nla)
1040                                goto nla_put_failure;
1041                        icmpv6_key = nla_data(nla);
1042                        icmpv6_key->icmpv6_type = ntohs(output->tp.src);
1043                        icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
1044
1045                        if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1046                            icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1047                                struct ovs_key_nd *nd_key;
1048
1049                                nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1050                                if (!nla)
1051                                        goto nla_put_failure;
1052                                nd_key = nla_data(nla);
1053                                memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1054                                                        sizeof(nd_key->nd_target));
1055                                ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
1056                                ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
1057                        }
1058                }
1059        }
1060
1061unencap:
1062        if (encap)
1063                nla_nest_end(skb, encap);
1064
1065        return 0;
1066
1067nla_put_failure:
1068        return -EMSGSIZE;
1069}
1070
1071#define MAX_ACTIONS_BUFSIZE     (32 * 1024)
1072
1073struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
1074{
1075        struct sw_flow_actions *sfa;
1076
1077        if (size > MAX_ACTIONS_BUFSIZE)
1078                return ERR_PTR(-EINVAL);
1079
1080        sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1081        if (!sfa)
1082                return ERR_PTR(-ENOMEM);
1083
1084        sfa->actions_len = 0;
1085        return sfa;
1086}
1087
1088/* Schedules 'sf_acts' to be freed after the next RCU grace period.
1089 * The caller must hold rcu_read_lock for this to be sensible. */
1090void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1091{
1092        kfree_rcu(sf_acts, rcu);
1093}
1094
1095static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1096                                       int attr_len)
1097{
1098
1099        struct sw_flow_actions *acts;
1100        int new_acts_size;
1101        int req_size = NLA_ALIGN(attr_len);
1102        int next_offset = offsetof(struct sw_flow_actions, actions) +
1103                                        (*sfa)->actions_len;
1104
1105        if (req_size <= (ksize(*sfa) - next_offset))
1106                goto out;
1107
1108        new_acts_size = ksize(*sfa) * 2;
1109
1110        if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1111                if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1112                        return ERR_PTR(-EMSGSIZE);
1113                new_acts_size = MAX_ACTIONS_BUFSIZE;
1114        }
1115
1116        acts = ovs_nla_alloc_flow_actions(new_acts_size);
1117        if (IS_ERR(acts))
1118                return (void *)acts;
1119
1120        memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1121        acts->actions_len = (*sfa)->actions_len;
1122        kfree(*sfa);
1123        *sfa = acts;
1124
1125out:
1126        (*sfa)->actions_len += req_size;
1127        return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1128}
1129
1130static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
1131{
1132        struct nlattr *a;
1133
1134        a = reserve_sfa_size(sfa, nla_attr_size(len));
1135        if (IS_ERR(a))
1136                return PTR_ERR(a);
1137
1138        a->nla_type = attrtype;
1139        a->nla_len = nla_attr_size(len);
1140
1141        if (data)
1142                memcpy(nla_data(a), data, len);
1143        memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1144
1145        return 0;
1146}
1147
1148static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1149                                          int attrtype)
1150{
1151        int used = (*sfa)->actions_len;
1152        int err;
1153
1154        err = add_action(sfa, attrtype, NULL, 0);
1155        if (err)
1156                return err;
1157
1158        return used;
1159}
1160
1161static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1162                                         int st_offset)
1163{
1164        struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1165                                                               st_offset);
1166
1167        a->nla_len = sfa->actions_len - st_offset;
1168}
1169
1170static int validate_and_copy_sample(const struct nlattr *attr,
1171                                    const struct sw_flow_key *key, int depth,
1172                                    struct sw_flow_actions **sfa)
1173{
1174        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1175        const struct nlattr *probability, *actions;
1176        const struct nlattr *a;
1177        int rem, start, err, st_acts;
1178
1179        memset(attrs, 0, sizeof(attrs));
1180        nla_for_each_nested(a, attr, rem) {
1181                int type = nla_type(a);
1182                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1183                        return -EINVAL;
1184                attrs[type] = a;
1185        }
1186        if (rem)
1187                return -EINVAL;
1188
1189        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1190        if (!probability || nla_len(probability) != sizeof(u32))
1191                return -EINVAL;
1192
1193        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1194        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1195                return -EINVAL;
1196
1197        /* validation done, copy sample action. */
1198        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
1199        if (start < 0)
1200                return start;
1201        err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1202                         nla_data(probability), sizeof(u32));
1203        if (err)
1204                return err;
1205        st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
1206        if (st_acts < 0)
1207                return st_acts;
1208
1209        err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
1210        if (err)
1211                return err;
1212
1213        add_nested_action_end(*sfa, st_acts);
1214        add_nested_action_end(*sfa, start);
1215
1216        return 0;
1217}
1218
1219static int validate_tp_port(const struct sw_flow_key *flow_key)
1220{
1221        if ((flow_key->eth.type == htons(ETH_P_IP) ||
1222             flow_key->eth.type == htons(ETH_P_IPV6)) &&
1223            (flow_key->tp.src || flow_key->tp.dst))
1224                return 0;
1225
1226        return -EINVAL;
1227}
1228
1229void ovs_match_init(struct sw_flow_match *match,
1230                    struct sw_flow_key *key,
1231                    struct sw_flow_mask *mask)
1232{
1233        memset(match, 0, sizeof(*match));
1234        match->key = key;
1235        match->mask = mask;
1236
1237        memset(key, 0, sizeof(*key));
1238
1239        if (mask) {
1240                memset(&mask->key, 0, sizeof(mask->key));
1241                mask->range.start = mask->range.end = 0;
1242        }
1243}
1244
1245static int validate_and_copy_set_tun(const struct nlattr *attr,
1246                                     struct sw_flow_actions **sfa)
1247{
1248        struct sw_flow_match match;
1249        struct sw_flow_key key;
1250        int err, start;
1251
1252        ovs_match_init(&match, &key, NULL);
1253        err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
1254        if (err)
1255                return err;
1256
1257        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
1258        if (start < 0)
1259                return start;
1260
1261        err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
1262                        sizeof(match.key->tun_key));
1263        add_nested_action_end(*sfa, start);
1264
1265        return err;
1266}
1267
1268static int validate_set(const struct nlattr *a,
1269                        const struct sw_flow_key *flow_key,
1270                        struct sw_flow_actions **sfa,
1271                        bool *set_tun)
1272{
1273        const struct nlattr *ovs_key = nla_data(a);
1274        int key_type = nla_type(ovs_key);
1275
1276        /* There can be only one key in a action */
1277        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1278                return -EINVAL;
1279
1280        if (key_type > OVS_KEY_ATTR_MAX ||
1281            (ovs_key_lens[key_type] != nla_len(ovs_key) &&
1282             ovs_key_lens[key_type] != -1))
1283                return -EINVAL;
1284
1285        switch (key_type) {
1286        const struct ovs_key_ipv4 *ipv4_key;
1287        const struct ovs_key_ipv6 *ipv6_key;
1288        int err;
1289
1290        case OVS_KEY_ATTR_PRIORITY:
1291        case OVS_KEY_ATTR_SKB_MARK:
1292        case OVS_KEY_ATTR_ETHERNET:
1293                break;
1294
1295        case OVS_KEY_ATTR_TUNNEL:
1296                *set_tun = true;
1297                err = validate_and_copy_set_tun(a, sfa);
1298                if (err)
1299                        return err;
1300                break;
1301
1302        case OVS_KEY_ATTR_IPV4:
1303                if (flow_key->eth.type != htons(ETH_P_IP))
1304                        return -EINVAL;
1305
1306                if (!flow_key->ip.proto)
1307                        return -EINVAL;
1308
1309                ipv4_key = nla_data(ovs_key);
1310                if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1311                        return -EINVAL;
1312
1313                if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1314                        return -EINVAL;
1315
1316                break;
1317
1318        case OVS_KEY_ATTR_IPV6:
1319                if (flow_key->eth.type != htons(ETH_P_IPV6))
1320                        return -EINVAL;
1321
1322                if (!flow_key->ip.proto)
1323                        return -EINVAL;
1324
1325                ipv6_key = nla_data(ovs_key);
1326                if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1327                        return -EINVAL;
1328
1329                if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1330                        return -EINVAL;
1331
1332                if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1333                        return -EINVAL;
1334
1335                break;
1336
1337        case OVS_KEY_ATTR_TCP:
1338                if (flow_key->ip.proto != IPPROTO_TCP)
1339                        return -EINVAL;
1340
1341                return validate_tp_port(flow_key);
1342
1343        case OVS_KEY_ATTR_UDP:
1344                if (flow_key->ip.proto != IPPROTO_UDP)
1345                        return -EINVAL;
1346
1347                return validate_tp_port(flow_key);
1348
1349        case OVS_KEY_ATTR_SCTP:
1350                if (flow_key->ip.proto != IPPROTO_SCTP)
1351                        return -EINVAL;
1352
1353                return validate_tp_port(flow_key);
1354
1355        default:
1356                return -EINVAL;
1357        }
1358
1359        return 0;
1360}
1361
1362static int validate_userspace(const struct nlattr *attr)
1363{
1364        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1365                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1366                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1367        };
1368        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1369        int error;
1370
1371        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
1372                                 attr, userspace_policy);
1373        if (error)
1374                return error;
1375
1376        if (!a[OVS_USERSPACE_ATTR_PID] ||
1377            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
1378                return -EINVAL;
1379
1380        return 0;
1381}
1382
1383static int copy_action(const struct nlattr *from,
1384                       struct sw_flow_actions **sfa)
1385{
1386        int totlen = NLA_ALIGN(from->nla_len);
1387        struct nlattr *to;
1388
1389        to = reserve_sfa_size(sfa, from->nla_len);
1390        if (IS_ERR(to))
1391                return PTR_ERR(to);
1392
1393        memcpy(to, from, totlen);
1394        return 0;
1395}
1396
1397int ovs_nla_copy_actions(const struct nlattr *attr,
1398                         const struct sw_flow_key *key,
1399                         int depth,
1400                         struct sw_flow_actions **sfa)
1401{
1402        const struct nlattr *a;
1403        int rem, err;
1404
1405        if (depth >= SAMPLE_ACTION_DEPTH)
1406                return -EOVERFLOW;
1407
1408        nla_for_each_nested(a, attr, rem) {
1409                /* Expected argument lengths, (u32)-1 for variable length. */
1410                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
1411                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
1412                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
1413                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1414                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
1415                        [OVS_ACTION_ATTR_SET] = (u32)-1,
1416                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
1417                };
1418                const struct ovs_action_push_vlan *vlan;
1419                int type = nla_type(a);
1420                bool skip_copy;
1421
1422                if (type > OVS_ACTION_ATTR_MAX ||
1423                    (action_lens[type] != nla_len(a) &&
1424                     action_lens[type] != (u32)-1))
1425                        return -EINVAL;
1426
1427                skip_copy = false;
1428                switch (type) {
1429                case OVS_ACTION_ATTR_UNSPEC:
1430                        return -EINVAL;
1431
1432                case OVS_ACTION_ATTR_USERSPACE:
1433                        err = validate_userspace(a);
1434                        if (err)
1435                                return err;
1436                        break;
1437
1438                case OVS_ACTION_ATTR_OUTPUT:
1439                        if (nla_get_u32(a) >= DP_MAX_PORTS)
1440                                return -EINVAL;
1441                        break;
1442
1443
1444                case OVS_ACTION_ATTR_POP_VLAN:
1445                        break;
1446
1447                case OVS_ACTION_ATTR_PUSH_VLAN:
1448                        vlan = nla_data(a);
1449                        if (vlan->vlan_tpid != htons(ETH_P_8021Q))
1450                                return -EINVAL;
1451                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
1452                                return -EINVAL;
1453                        break;
1454
1455                case OVS_ACTION_ATTR_SET:
1456                        err = validate_set(a, key, sfa, &skip_copy);
1457                        if (err)
1458                                return err;
1459                        break;
1460
1461                case OVS_ACTION_ATTR_SAMPLE:
1462                        err = validate_and_copy_sample(a, key, depth, sfa);
1463                        if (err)
1464                                return err;
1465                        skip_copy = true;
1466                        break;
1467
1468                default:
1469                        return -EINVAL;
1470                }
1471                if (!skip_copy) {
1472                        err = copy_action(a, sfa);
1473                        if (err)
1474                                return err;
1475                }
1476        }
1477
1478        if (rem > 0)
1479                return -EINVAL;
1480
1481        return 0;
1482}
1483
1484static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1485{
1486        const struct nlattr *a;
1487        struct nlattr *start;
1488        int err = 0, rem;
1489
1490        start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1491        if (!start)
1492                return -EMSGSIZE;
1493
1494        nla_for_each_nested(a, attr, rem) {
1495                int type = nla_type(a);
1496                struct nlattr *st_sample;
1497
1498                switch (type) {
1499                case OVS_SAMPLE_ATTR_PROBABILITY:
1500                        if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
1501                                    sizeof(u32), nla_data(a)))
1502                                return -EMSGSIZE;
1503                        break;
1504                case OVS_SAMPLE_ATTR_ACTIONS:
1505                        st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1506                        if (!st_sample)
1507                                return -EMSGSIZE;
1508                        err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
1509                        if (err)
1510                                return err;
1511                        nla_nest_end(skb, st_sample);
1512                        break;
1513                }
1514        }
1515
1516        nla_nest_end(skb, start);
1517        return err;
1518}
1519
1520static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1521{
1522        const struct nlattr *ovs_key = nla_data(a);
1523        int key_type = nla_type(ovs_key);
1524        struct nlattr *start;
1525        int err;
1526
1527        switch (key_type) {
1528        case OVS_KEY_ATTR_IPV4_TUNNEL:
1529                start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1530                if (!start)
1531                        return -EMSGSIZE;
1532
1533                err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
1534                                             nla_data(ovs_key));
1535                if (err)
1536                        return err;
1537                nla_nest_end(skb, start);
1538                break;
1539        default:
1540                if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1541                        return -EMSGSIZE;
1542                break;
1543        }
1544
1545        return 0;
1546}
1547
1548int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
1549{
1550        const struct nlattr *a;
1551        int rem, err;
1552
1553        nla_for_each_attr(a, attr, len, rem) {
1554                int type = nla_type(a);
1555
1556                switch (type) {
1557                case OVS_ACTION_ATTR_SET:
1558                        err = set_action_to_attr(a, skb);
1559                        if (err)
1560                                return err;
1561                        break;
1562
1563                case OVS_ACTION_ATTR_SAMPLE:
1564                        err = sample_action_to_attr(a, skb);
1565                        if (err)
1566                                return err;
1567                        break;
1568                default:
1569                        if (nla_put(skb, type, nla_len(a), nla_data(a)))
1570                                return -EMSGSIZE;
1571                        break;
1572                }
1573        }
1574
1575        return 0;
1576}
1577