linux/net/openvswitch/flow_netlink.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2014 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include "flow.h"
  22#include "datapath.h"
  23#include <linux/uaccess.h>
  24#include <linux/netdevice.h>
  25#include <linux/etherdevice.h>
  26#include <linux/if_ether.h>
  27#include <linux/if_vlan.h>
  28#include <net/llc_pdu.h>
  29#include <linux/kernel.h>
  30#include <linux/jhash.h>
  31#include <linux/jiffies.h>
  32#include <linux/llc.h>
  33#include <linux/module.h>
  34#include <linux/in.h>
  35#include <linux/rcupdate.h>
  36#include <linux/if_arp.h>
  37#include <linux/ip.h>
  38#include <linux/ipv6.h>
  39#include <linux/sctp.h>
  40#include <linux/tcp.h>
  41#include <linux/udp.h>
  42#include <linux/icmp.h>
  43#include <linux/icmpv6.h>
  44#include <linux/rculist.h>
  45#include <net/geneve.h>
  46#include <net/ip.h>
  47#include <net/ipv6.h>
  48#include <net/ndisc.h>
  49#include <net/mpls.h>
  50#include <net/vxlan.h>
  51
  52#include "flow_netlink.h"
  53
  54struct ovs_len_tbl {
  55        int len;
  56        const struct ovs_len_tbl *next;
  57};
  58
  59#define OVS_ATTR_NESTED -1
  60#define OVS_ATTR_VARIABLE -2
  61
  62static void update_range(struct sw_flow_match *match,
  63                         size_t offset, size_t size, bool is_mask)
  64{
  65        struct sw_flow_key_range *range;
  66        size_t start = rounddown(offset, sizeof(long));
  67        size_t end = roundup(offset + size, sizeof(long));
  68
  69        if (!is_mask)
  70                range = &match->range;
  71        else
  72                range = &match->mask->range;
  73
  74        if (range->start == range->end) {
  75                range->start = start;
  76                range->end = end;
  77                return;
  78        }
  79
  80        if (range->start > start)
  81                range->start = start;
  82
  83        if (range->end < end)
  84                range->end = end;
  85}
  86
  87#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
  88        do { \
  89                update_range(match, offsetof(struct sw_flow_key, field),    \
  90                             sizeof((match)->key->field), is_mask);         \
  91                if (is_mask)                                                \
  92                        (match)->mask->key.field = value;                   \
  93                else                                                        \
  94                        (match)->key->field = value;                        \
  95        } while (0)
  96
  97#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
  98        do {                                                                \
  99                update_range(match, offset, len, is_mask);                  \
 100                if (is_mask)                                                \
 101                        memcpy((u8 *)&(match)->mask->key + offset, value_p, \
 102                               len);                                       \
 103                else                                                        \
 104                        memcpy((u8 *)(match)->key + offset, value_p, len);  \
 105        } while (0)
 106
 107#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
 108        SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
 109                                  value_p, len, is_mask)
 110
 111#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
 112        do {                                                                \
 113                update_range(match, offsetof(struct sw_flow_key, field),    \
 114                             sizeof((match)->key->field), is_mask);         \
 115                if (is_mask)                                                \
 116                        memset((u8 *)&(match)->mask->key.field, value,      \
 117                               sizeof((match)->mask->key.field));           \
 118                else                                                        \
 119                        memset((u8 *)&(match)->key->field, value,           \
 120                               sizeof((match)->key->field));                \
 121        } while (0)
 122
 123static bool match_validate(const struct sw_flow_match *match,
 124                           u64 key_attrs, u64 mask_attrs, bool log)
 125{
 126        u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
 127        u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 128
 129        /* The following mask attributes allowed only if they
 130         * pass the validation tests. */
 131        mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
 132                        | (1 << OVS_KEY_ATTR_IPV6)
 133                        | (1 << OVS_KEY_ATTR_TCP)
 134                        | (1 << OVS_KEY_ATTR_TCP_FLAGS)
 135                        | (1 << OVS_KEY_ATTR_UDP)
 136                        | (1 << OVS_KEY_ATTR_SCTP)
 137                        | (1 << OVS_KEY_ATTR_ICMP)
 138                        | (1 << OVS_KEY_ATTR_ICMPV6)
 139                        | (1 << OVS_KEY_ATTR_ARP)
 140                        | (1 << OVS_KEY_ATTR_ND)
 141                        | (1 << OVS_KEY_ATTR_MPLS));
 142
 143        /* Always allowed mask fields. */
 144        mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
 145                       | (1 << OVS_KEY_ATTR_IN_PORT)
 146                       | (1 << OVS_KEY_ATTR_ETHERTYPE));
 147
 148        /* Check key attributes. */
 149        if (match->key->eth.type == htons(ETH_P_ARP)
 150                        || match->key->eth.type == htons(ETH_P_RARP)) {
 151                key_expected |= 1 << OVS_KEY_ATTR_ARP;
 152                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 153                        mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
 154        }
 155
 156        if (eth_p_mpls(match->key->eth.type)) {
 157                key_expected |= 1 << OVS_KEY_ATTR_MPLS;
 158                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 159                        mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
 160        }
 161
 162        if (match->key->eth.type == htons(ETH_P_IP)) {
 163                key_expected |= 1 << OVS_KEY_ATTR_IPV4;
 164                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 165                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
 166
 167                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 168                        if (match->key->ip.proto == IPPROTO_UDP) {
 169                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 170                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 171                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 172                        }
 173
 174                        if (match->key->ip.proto == IPPROTO_SCTP) {
 175                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 176                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 177                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 178                        }
 179
 180                        if (match->key->ip.proto == IPPROTO_TCP) {
 181                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 182                                key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 183                                if (match->mask && (match->mask->key.ip.proto == 0xff)) {
 184                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 185                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 186                                }
 187                        }
 188
 189                        if (match->key->ip.proto == IPPROTO_ICMP) {
 190                                key_expected |= 1 << OVS_KEY_ATTR_ICMP;
 191                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 192                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
 193                        }
 194                }
 195        }
 196
 197        if (match->key->eth.type == htons(ETH_P_IPV6)) {
 198                key_expected |= 1 << OVS_KEY_ATTR_IPV6;
 199                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 200                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
 201
 202                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 203                        if (match->key->ip.proto == IPPROTO_UDP) {
 204                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 205                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 206                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 207                        }
 208
 209                        if (match->key->ip.proto == IPPROTO_SCTP) {
 210                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 211                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 212                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 213                        }
 214
 215                        if (match->key->ip.proto == IPPROTO_TCP) {
 216                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 217                                key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 218                                if (match->mask && (match->mask->key.ip.proto == 0xff)) {
 219                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 220                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 221                                }
 222                        }
 223
 224                        if (match->key->ip.proto == IPPROTO_ICMPV6) {
 225                                key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
 226                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 227                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
 228
 229                                if (match->key->tp.src ==
 230                                                htons(NDISC_NEIGHBOUR_SOLICITATION) ||
 231                                    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 232                                        key_expected |= 1 << OVS_KEY_ATTR_ND;
 233                                        if (match->mask && (match->mask->key.tp.src == htons(0xff)))
 234                                                mask_allowed |= 1 << OVS_KEY_ATTR_ND;
 235                                }
 236                        }
 237                }
 238        }
 239
 240        if ((key_attrs & key_expected) != key_expected) {
 241                /* Key attributes check failed. */
 242                OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
 243                          (unsigned long long)key_attrs,
 244                          (unsigned long long)key_expected);
 245                return false;
 246        }
 247
 248        if ((mask_attrs & mask_allowed) != mask_attrs) {
 249                /* Mask attributes check failed. */
 250                OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
 251                          (unsigned long long)mask_attrs,
 252                          (unsigned long long)mask_allowed);
 253                return false;
 254        }
 255
 256        return true;
 257}
 258
 259size_t ovs_tun_key_attr_size(void)
 260{
 261        /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
 262         * updating this function.
 263         */
 264        return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
 265                + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
 266                + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
 267                + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
 268                + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
 269                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 270                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
 271                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
 272                + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
 273                /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
 274                 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
 275                 */
 276                + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
 277                + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 278}
 279
 280size_t ovs_key_attr_size(void)
 281{
 282        /* Whenever adding new OVS_KEY_ FIELDS, we should consider
 283         * updating this function.
 284         */
 285        BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
 286
 287        return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 288                + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
 289                  + ovs_tun_key_attr_size()
 290                + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 291                + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 292                + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
 293                + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
 294                + nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
 295                + nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
 296                + nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
 297                + nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
 298                + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 299                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 300                + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
 301                + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
 302                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 303                + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
 304                + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
 305                + nla_total_size(28); /* OVS_KEY_ATTR_ND */
 306}
 307
 308static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
 309        [OVS_VXLAN_EXT_GBP]         = { .len = sizeof(u32) },
 310};
 311
 312static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
 313        [OVS_TUNNEL_KEY_ATTR_ID]            = { .len = sizeof(u64) },
 314        [OVS_TUNNEL_KEY_ATTR_IPV4_SRC]      = { .len = sizeof(u32) },
 315        [OVS_TUNNEL_KEY_ATTR_IPV4_DST]      = { .len = sizeof(u32) },
 316        [OVS_TUNNEL_KEY_ATTR_TOS]           = { .len = 1 },
 317        [OVS_TUNNEL_KEY_ATTR_TTL]           = { .len = 1 },
 318        [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
 319        [OVS_TUNNEL_KEY_ATTR_CSUM]          = { .len = 0 },
 320        [OVS_TUNNEL_KEY_ATTR_TP_SRC]        = { .len = sizeof(u16) },
 321        [OVS_TUNNEL_KEY_ATTR_TP_DST]        = { .len = sizeof(u16) },
 322        [OVS_TUNNEL_KEY_ATTR_OAM]           = { .len = 0 },
 323        [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
 324        [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
 325                                                .next = ovs_vxlan_ext_key_lens },
 326        [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
 327        [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
 328};
 329
 330/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
 331static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 332        [OVS_KEY_ATTR_ENCAP]     = { .len = OVS_ATTR_NESTED },
 333        [OVS_KEY_ATTR_PRIORITY]  = { .len = sizeof(u32) },
 334        [OVS_KEY_ATTR_IN_PORT]   = { .len = sizeof(u32) },
 335        [OVS_KEY_ATTR_SKB_MARK]  = { .len = sizeof(u32) },
 336        [OVS_KEY_ATTR_ETHERNET]  = { .len = sizeof(struct ovs_key_ethernet) },
 337        [OVS_KEY_ATTR_VLAN]      = { .len = sizeof(__be16) },
 338        [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
 339        [OVS_KEY_ATTR_IPV4]      = { .len = sizeof(struct ovs_key_ipv4) },
 340        [OVS_KEY_ATTR_IPV6]      = { .len = sizeof(struct ovs_key_ipv6) },
 341        [OVS_KEY_ATTR_TCP]       = { .len = sizeof(struct ovs_key_tcp) },
 342        [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
 343        [OVS_KEY_ATTR_UDP]       = { .len = sizeof(struct ovs_key_udp) },
 344        [OVS_KEY_ATTR_SCTP]      = { .len = sizeof(struct ovs_key_sctp) },
 345        [OVS_KEY_ATTR_ICMP]      = { .len = sizeof(struct ovs_key_icmp) },
 346        [OVS_KEY_ATTR_ICMPV6]    = { .len = sizeof(struct ovs_key_icmpv6) },
 347        [OVS_KEY_ATTR_ARP]       = { .len = sizeof(struct ovs_key_arp) },
 348        [OVS_KEY_ATTR_ND]        = { .len = sizeof(struct ovs_key_nd) },
 349        [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
 350        [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
 351        [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
 352                                     .next = ovs_tunnel_key_lens, },
 353        [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
 354        [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
 355        [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
 356        [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
 357        [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
 358};
 359
 360static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
 361{
 362        return expected_len == attr_len ||
 363               expected_len == OVS_ATTR_NESTED ||
 364               expected_len == OVS_ATTR_VARIABLE;
 365}
 366
 367static bool is_all_zero(const u8 *fp, size_t size)
 368{
 369        int i;
 370
 371        if (!fp)
 372                return false;
 373
 374        for (i = 0; i < size; i++)
 375                if (fp[i])
 376                        return false;
 377
 378        return true;
 379}
 380
 381static int __parse_flow_nlattrs(const struct nlattr *attr,
 382                                const struct nlattr *a[],
 383                                u64 *attrsp, bool log, bool nz)
 384{
 385        const struct nlattr *nla;
 386        u64 attrs;
 387        int rem;
 388
 389        attrs = *attrsp;
 390        nla_for_each_nested(nla, attr, rem) {
 391                u16 type = nla_type(nla);
 392                int expected_len;
 393
 394                if (type > OVS_KEY_ATTR_MAX) {
 395                        OVS_NLERR(log, "Key type %d is out of range max %d",
 396                                  type, OVS_KEY_ATTR_MAX);
 397                        return -EINVAL;
 398                }
 399
 400                if (attrs & (1 << type)) {
 401                        OVS_NLERR(log, "Duplicate key (type %d).", type);
 402                        return -EINVAL;
 403                }
 404
 405                expected_len = ovs_key_lens[type].len;
 406                if (!check_attr_len(nla_len(nla), expected_len)) {
 407                        OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
 408                                  type, nla_len(nla), expected_len);
 409                        return -EINVAL;
 410                }
 411
 412                if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
 413                        attrs |= 1 << type;
 414                        a[type] = nla;
 415                }
 416        }
 417        if (rem) {
 418                OVS_NLERR(log, "Message has %d unknown bytes.", rem);
 419                return -EINVAL;
 420        }
 421
 422        *attrsp = attrs;
 423        return 0;
 424}
 425
 426static int parse_flow_mask_nlattrs(const struct nlattr *attr,
 427                                   const struct nlattr *a[], u64 *attrsp,
 428                                   bool log)
 429{
 430        return __parse_flow_nlattrs(attr, a, attrsp, log, true);
 431}
 432
 433static int parse_flow_nlattrs(const struct nlattr *attr,
 434                              const struct nlattr *a[], u64 *attrsp,
 435                              bool log)
 436{
 437        return __parse_flow_nlattrs(attr, a, attrsp, log, false);
 438}
 439
 440static int genev_tun_opt_from_nlattr(const struct nlattr *a,
 441                                     struct sw_flow_match *match, bool is_mask,
 442                                     bool log)
 443{
 444        unsigned long opt_key_offset;
 445
 446        if (nla_len(a) > sizeof(match->key->tun_opts)) {
 447                OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
 448                          nla_len(a), sizeof(match->key->tun_opts));
 449                return -EINVAL;
 450        }
 451
 452        if (nla_len(a) % 4 != 0) {
 453                OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
 454                          nla_len(a));
 455                return -EINVAL;
 456        }
 457
 458        /* We need to record the length of the options passed
 459         * down, otherwise packets with the same format but
 460         * additional options will be silently matched.
 461         */
 462        if (!is_mask) {
 463                SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
 464                                false);
 465        } else {
 466                /* This is somewhat unusual because it looks at
 467                 * both the key and mask while parsing the
 468                 * attributes (and by extension assumes the key
 469                 * is parsed first). Normally, we would verify
 470                 * that each is the correct length and that the
 471                 * attributes line up in the validate function.
 472                 * However, that is difficult because this is
 473                 * variable length and we won't have the
 474                 * information later.
 475                 */
 476                if (match->key->tun_opts_len != nla_len(a)) {
 477                        OVS_NLERR(log, "Geneve option len %d != mask len %d",
 478                                  match->key->tun_opts_len, nla_len(a));
 479                        return -EINVAL;
 480                }
 481
 482                SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 483        }
 484
 485        opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
 486        SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
 487                                  nla_len(a), is_mask);
 488        return 0;
 489}
 490
 491static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
 492                                     struct sw_flow_match *match, bool is_mask,
 493                                     bool log)
 494{
 495        struct nlattr *a;
 496        int rem;
 497        unsigned long opt_key_offset;
 498        struct vxlan_metadata opts;
 499
 500        BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
 501
 502        memset(&opts, 0, sizeof(opts));
 503        nla_for_each_nested(a, attr, rem) {
 504                int type = nla_type(a);
 505
 506                if (type > OVS_VXLAN_EXT_MAX) {
 507                        OVS_NLERR(log, "VXLAN extension %d out of range max %d",
 508                                  type, OVS_VXLAN_EXT_MAX);
 509                        return -EINVAL;
 510                }
 511
 512                if (!check_attr_len(nla_len(a),
 513                                    ovs_vxlan_ext_key_lens[type].len)) {
 514                        OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
 515                                  type, nla_len(a),
 516                                  ovs_vxlan_ext_key_lens[type].len);
 517                        return -EINVAL;
 518                }
 519
 520                switch (type) {
 521                case OVS_VXLAN_EXT_GBP:
 522                        opts.gbp = nla_get_u32(a);
 523                        break;
 524                default:
 525                        OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
 526                                  type);
 527                        return -EINVAL;
 528                }
 529        }
 530        if (rem) {
 531                OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
 532                          rem);
 533                return -EINVAL;
 534        }
 535
 536        if (!is_mask)
 537                SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
 538        else
 539                SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 540
 541        opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
 542        SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
 543                                  is_mask);
 544        return 0;
 545}
 546
 547static int ip_tun_from_nlattr(const struct nlattr *attr,
 548                              struct sw_flow_match *match, bool is_mask,
 549                              bool log)
 550{
 551        bool ttl = false, ipv4 = false, ipv6 = false;
 552        __be16 tun_flags = 0;
 553        int opts_type = 0;
 554        struct nlattr *a;
 555        int rem;
 556
 557        nla_for_each_nested(a, attr, rem) {
 558                int type = nla_type(a);
 559                int err;
 560
 561                if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
 562                        OVS_NLERR(log, "Tunnel attr %d out of range max %d",
 563                                  type, OVS_TUNNEL_KEY_ATTR_MAX);
 564                        return -EINVAL;
 565                }
 566
 567                if (!check_attr_len(nla_len(a),
 568                                    ovs_tunnel_key_lens[type].len)) {
 569                        OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
 570                                  type, nla_len(a), ovs_tunnel_key_lens[type].len);
 571                        return -EINVAL;
 572                }
 573
 574                switch (type) {
 575                case OVS_TUNNEL_KEY_ATTR_ID:
 576                        SW_FLOW_KEY_PUT(match, tun_key.tun_id,
 577                                        nla_get_be64(a), is_mask);
 578                        tun_flags |= TUNNEL_KEY;
 579                        break;
 580                case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
 581                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
 582                                        nla_get_in_addr(a), is_mask);
 583                        ipv4 = true;
 584                        break;
 585                case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
 586                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
 587                                        nla_get_in_addr(a), is_mask);
 588                        ipv4 = true;
 589                        break;
 590                case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
 591                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
 592                                        nla_get_in6_addr(a), is_mask);
 593                        ipv6 = true;
 594                        break;
 595                case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
 596                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
 597                                        nla_get_in6_addr(a), is_mask);
 598                        ipv6 = true;
 599                        break;
 600                case OVS_TUNNEL_KEY_ATTR_TOS:
 601                        SW_FLOW_KEY_PUT(match, tun_key.tos,
 602                                        nla_get_u8(a), is_mask);
 603                        break;
 604                case OVS_TUNNEL_KEY_ATTR_TTL:
 605                        SW_FLOW_KEY_PUT(match, tun_key.ttl,
 606                                        nla_get_u8(a), is_mask);
 607                        ttl = true;
 608                        break;
 609                case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
 610                        tun_flags |= TUNNEL_DONT_FRAGMENT;
 611                        break;
 612                case OVS_TUNNEL_KEY_ATTR_CSUM:
 613                        tun_flags |= TUNNEL_CSUM;
 614                        break;
 615                case OVS_TUNNEL_KEY_ATTR_TP_SRC:
 616                        SW_FLOW_KEY_PUT(match, tun_key.tp_src,
 617                                        nla_get_be16(a), is_mask);
 618                        break;
 619                case OVS_TUNNEL_KEY_ATTR_TP_DST:
 620                        SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
 621                                        nla_get_be16(a), is_mask);
 622                        break;
 623                case OVS_TUNNEL_KEY_ATTR_OAM:
 624                        tun_flags |= TUNNEL_OAM;
 625                        break;
 626                case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
 627                        if (opts_type) {
 628                                OVS_NLERR(log, "Multiple metadata blocks provided");
 629                                return -EINVAL;
 630                        }
 631
 632                        err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
 633                        if (err)
 634                                return err;
 635
 636                        tun_flags |= TUNNEL_GENEVE_OPT;
 637                        opts_type = type;
 638                        break;
 639                case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
 640                        if (opts_type) {
 641                                OVS_NLERR(log, "Multiple metadata blocks provided");
 642                                return -EINVAL;
 643                        }
 644
 645                        err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
 646                        if (err)
 647                                return err;
 648
 649                        tun_flags |= TUNNEL_VXLAN_OPT;
 650                        opts_type = type;
 651                        break;
 652                default:
 653                        OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 654                                  type);
 655                        return -EINVAL;
 656                }
 657        }
 658
 659        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 660        if (is_mask)
 661                SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
 662        else
 663                SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
 664                                false);
 665
 666        if (rem > 0) {
 667                OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
 668                          rem);
 669                return -EINVAL;
 670        }
 671
 672        if (ipv4 && ipv6) {
 673                OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
 674                return -EINVAL;
 675        }
 676
 677        if (!is_mask) {
 678                if (!ipv4 && !ipv6) {
 679                        OVS_NLERR(log, "IP tunnel dst address not specified");
 680                        return -EINVAL;
 681                }
 682                if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
 683                        OVS_NLERR(log, "IPv4 tunnel dst address is zero");
 684                        return -EINVAL;
 685                }
 686                if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
 687                        OVS_NLERR(log, "IPv6 tunnel dst address is zero");
 688                        return -EINVAL;
 689                }
 690
 691                if (!ttl) {
 692                        OVS_NLERR(log, "IP tunnel TTL not specified.");
 693                        return -EINVAL;
 694                }
 695        }
 696
 697        return opts_type;
 698}
 699
 700static int vxlan_opt_to_nlattr(struct sk_buff *skb,
 701                               const void *tun_opts, int swkey_tun_opts_len)
 702{
 703        const struct vxlan_metadata *opts = tun_opts;
 704        struct nlattr *nla;
 705
 706        nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
 707        if (!nla)
 708                return -EMSGSIZE;
 709
 710        if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
 711                return -EMSGSIZE;
 712
 713        nla_nest_end(skb, nla);
 714        return 0;
 715}
 716
 717static int __ip_tun_to_nlattr(struct sk_buff *skb,
 718                              const struct ip_tunnel_key *output,
 719                              const void *tun_opts, int swkey_tun_opts_len,
 720                              unsigned short tun_proto)
 721{
 722        if (output->tun_flags & TUNNEL_KEY &&
 723            nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
 724                         OVS_TUNNEL_KEY_ATTR_PAD))
 725                return -EMSGSIZE;
 726        switch (tun_proto) {
 727        case AF_INET:
 728                if (output->u.ipv4.src &&
 729                    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
 730                                    output->u.ipv4.src))
 731                        return -EMSGSIZE;
 732                if (output->u.ipv4.dst &&
 733                    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
 734                                    output->u.ipv4.dst))
 735                        return -EMSGSIZE;
 736                break;
 737        case AF_INET6:
 738                if (!ipv6_addr_any(&output->u.ipv6.src) &&
 739                    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
 740                                     &output->u.ipv6.src))
 741                        return -EMSGSIZE;
 742                if (!ipv6_addr_any(&output->u.ipv6.dst) &&
 743                    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
 744                                     &output->u.ipv6.dst))
 745                        return -EMSGSIZE;
 746                break;
 747        }
 748        if (output->tos &&
 749            nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
 750                return -EMSGSIZE;
 751        if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
 752                return -EMSGSIZE;
 753        if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
 754            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
 755                return -EMSGSIZE;
 756        if ((output->tun_flags & TUNNEL_CSUM) &&
 757            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 758                return -EMSGSIZE;
 759        if (output->tp_src &&
 760            nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
 761                return -EMSGSIZE;
 762        if (output->tp_dst &&
 763            nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
 764                return -EMSGSIZE;
 765        if ((output->tun_flags & TUNNEL_OAM) &&
 766            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 767                return -EMSGSIZE;
 768        if (swkey_tun_opts_len) {
 769                if (output->tun_flags & TUNNEL_GENEVE_OPT &&
 770                    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
 771                            swkey_tun_opts_len, tun_opts))
 772                        return -EMSGSIZE;
 773                else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
 774                         vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
 775                        return -EMSGSIZE;
 776        }
 777
 778        return 0;
 779}
 780
 781static int ip_tun_to_nlattr(struct sk_buff *skb,
 782                            const struct ip_tunnel_key *output,
 783                            const void *tun_opts, int swkey_tun_opts_len,
 784                            unsigned short tun_proto)
 785{
 786        struct nlattr *nla;
 787        int err;
 788
 789        nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
 790        if (!nla)
 791                return -EMSGSIZE;
 792
 793        err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
 794                                 tun_proto);
 795        if (err)
 796                return err;
 797
 798        nla_nest_end(skb, nla);
 799        return 0;
 800}
 801
 802int ovs_nla_put_tunnel_info(struct sk_buff *skb,
 803                            struct ip_tunnel_info *tun_info)
 804{
 805        return __ip_tun_to_nlattr(skb, &tun_info->key,
 806                                  ip_tunnel_info_opts(tun_info),
 807                                  tun_info->options_len,
 808                                  ip_tunnel_info_af(tun_info));
 809}
 810
 811static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
 812                                    const struct nlattr *a[],
 813                                    bool is_mask, bool inner)
 814{
 815        __be16 tci = 0;
 816        __be16 tpid = 0;
 817
 818        if (a[OVS_KEY_ATTR_VLAN])
 819                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 820
 821        if (a[OVS_KEY_ATTR_ETHERTYPE])
 822                tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
 823
 824        if (likely(!inner)) {
 825                SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
 826                SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
 827        } else {
 828                SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
 829                SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
 830        }
 831        return 0;
 832}
 833
 834static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
 835                                      u64 key_attrs, bool inner,
 836                                      const struct nlattr **a, bool log)
 837{
 838        __be16 tci = 0;
 839
 840        if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
 841              (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
 842               eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
 843                /* Not a VLAN. */
 844                return 0;
 845        }
 846
 847        if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
 848              (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
 849                OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
 850                return -EINVAL;
 851        }
 852
 853        if (a[OVS_KEY_ATTR_VLAN])
 854                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 855
 856        if (!(tci & htons(VLAN_TAG_PRESENT))) {
 857                if (tci) {
 858                        OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
 859                                  (inner) ? "C-VLAN" : "VLAN");
 860                        return -EINVAL;
 861                } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
 862                        /* Corner case for truncated VLAN header. */
 863                        OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
 864                                  (inner) ? "C-VLAN" : "VLAN");
 865                        return -EINVAL;
 866                }
 867        }
 868
 869        return 1;
 870}
 871
 872static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
 873                                           u64 key_attrs, bool inner,
 874                                           const struct nlattr **a, bool log)
 875{
 876        __be16 tci = 0;
 877        __be16 tpid = 0;
 878        bool encap_valid = !!(match->key->eth.vlan.tci &
 879                              htons(VLAN_TAG_PRESENT));
 880        bool i_encap_valid = !!(match->key->eth.cvlan.tci &
 881                                htons(VLAN_TAG_PRESENT));
 882
 883        if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
 884                /* Not a VLAN. */
 885                return 0;
 886        }
 887
 888        if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
 889                OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
 890                          (inner) ? "C-VLAN" : "VLAN");
 891                return -EINVAL;
 892        }
 893
 894        if (a[OVS_KEY_ATTR_VLAN])
 895                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 896
 897        if (a[OVS_KEY_ATTR_ETHERTYPE])
 898                tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
 899
 900        if (tpid != htons(0xffff)) {
 901                OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
 902                          (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
 903                return -EINVAL;
 904        }
 905        if (!(tci & htons(VLAN_TAG_PRESENT))) {
 906                OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
 907                          (inner) ? "C-VLAN" : "VLAN");
 908                return -EINVAL;
 909        }
 910
 911        return 1;
 912}
 913
 914static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
 915                                     u64 *key_attrs, bool inner,
 916                                     const struct nlattr **a, bool is_mask,
 917                                     bool log)
 918{
 919        int err;
 920        const struct nlattr *encap;
 921
 922        if (!is_mask)
 923                err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
 924                                                 a, log);
 925        else
 926                err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
 927                                                      a, log);
 928        if (err <= 0)
 929                return err;
 930
 931        err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
 932        if (err)
 933                return err;
 934
 935        *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
 936        *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
 937        *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
 938
 939        encap = a[OVS_KEY_ATTR_ENCAP];
 940
 941        if (!is_mask)
 942                err = parse_flow_nlattrs(encap, a, key_attrs, log);
 943        else
 944                err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
 945
 946        return err;
 947}
 948
 949static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
 950                                   u64 *key_attrs, const struct nlattr **a,
 951                                   bool is_mask, bool log)
 952{
 953        int err;
 954        bool encap_valid = false;
 955
 956        err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
 957                                        is_mask, log);
 958        if (err)
 959                return err;
 960
 961        encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
 962        if (encap_valid) {
 963                err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
 964                                                is_mask, log);
 965                if (err)
 966                        return err;
 967        }
 968
 969        return 0;
 970}
 971
 972static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 973                                 u64 *attrs, const struct nlattr **a,
 974                                 bool is_mask, bool log)
 975{
 976        if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
 977                u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
 978
 979                SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
 980                *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
 981        }
 982
 983        if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
 984                u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
 985
 986                SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
 987                *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
 988        }
 989
 990        if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
 991                SW_FLOW_KEY_PUT(match, phy.priority,
 992                          nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
 993                *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
 994        }
 995
 996        if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
 997                u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
 998
 999                if (is_mask) {
1000                        in_port = 0xffffffff; /* Always exact match in_port. */
1001                } else if (in_port >= DP_MAX_PORTS) {
1002                        OVS_NLERR(log, "Port %d exceeds max allowable %d",
1003                                  in_port, DP_MAX_PORTS);
1004                        return -EINVAL;
1005                }
1006
1007                SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1008                *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1009        } else if (!is_mask) {
1010                SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1011        }
1012
1013        if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1014                uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1015
1016                SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1017                *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1018        }
1019        if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1020                if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1021                                       is_mask, log) < 0)
1022                        return -EINVAL;
1023                *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1024        }
1025
1026        if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1027            ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1028                u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1029
1030                if (ct_state & ~CT_SUPPORTED_MASK) {
1031                        OVS_NLERR(log, "ct_state flags %08x unsupported",
1032                                  ct_state);
1033                        return -EINVAL;
1034                }
1035
1036                SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
1037                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1038        }
1039        if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1040            ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1041                u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1042
1043                SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
1044                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1045        }
1046        if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1047            ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1048                u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1049
1050                SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1051                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1052        }
1053        if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1054            ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1055                const struct ovs_key_ct_labels *cl;
1056
1057                cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1058                SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1059                                   sizeof(*cl), is_mask);
1060                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1061        }
1062        return 0;
1063}
1064
1065static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1066                                u64 attrs, const struct nlattr **a,
1067                                bool is_mask, bool log)
1068{
1069        int err;
1070
1071        err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1072        if (err)
1073                return err;
1074
1075        if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1076                const struct ovs_key_ethernet *eth_key;
1077
1078                eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1079                SW_FLOW_KEY_MEMCPY(match, eth.src,
1080                                eth_key->eth_src, ETH_ALEN, is_mask);
1081                SW_FLOW_KEY_MEMCPY(match, eth.dst,
1082                                eth_key->eth_dst, ETH_ALEN, is_mask);
1083                attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1084        }
1085
1086        if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1087                /* VLAN attribute is always parsed before getting here since it
1088                 * may occur multiple times.
1089                 */
1090                OVS_NLERR(log, "VLAN attribute unexpected.");
1091                return -EINVAL;
1092        }
1093
1094        if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1095                __be16 eth_type;
1096
1097                eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1098                if (is_mask) {
1099                        /* Always exact match EtherType. */
1100                        eth_type = htons(0xffff);
1101                } else if (!eth_proto_is_802_3(eth_type)) {
1102                        OVS_NLERR(log, "EtherType %x is less than min %x",
1103                                  ntohs(eth_type), ETH_P_802_3_MIN);
1104                        return -EINVAL;
1105                }
1106
1107                SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1108                attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1109        } else if (!is_mask) {
1110                SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1111        }
1112
1113        if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1114                const struct ovs_key_ipv4 *ipv4_key;
1115
1116                ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1117                if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1118                        OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1119                                  ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1120                        return -EINVAL;
1121                }
1122                SW_FLOW_KEY_PUT(match, ip.proto,
1123                                ipv4_key->ipv4_proto, is_mask);
1124                SW_FLOW_KEY_PUT(match, ip.tos,
1125                                ipv4_key->ipv4_tos, is_mask);
1126                SW_FLOW_KEY_PUT(match, ip.ttl,
1127                                ipv4_key->ipv4_ttl, is_mask);
1128                SW_FLOW_KEY_PUT(match, ip.frag,
1129                                ipv4_key->ipv4_frag, is_mask);
1130                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1131                                ipv4_key->ipv4_src, is_mask);
1132                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1133                                ipv4_key->ipv4_dst, is_mask);
1134                attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1135        }
1136
1137        if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1138                const struct ovs_key_ipv6 *ipv6_key;
1139
1140                ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1141                if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1142                        OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1143                                  ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1144                        return -EINVAL;
1145                }
1146
1147                if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1148                        OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
1149                                  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1150                        return -EINVAL;
1151                }
1152
1153                SW_FLOW_KEY_PUT(match, ipv6.label,
1154                                ipv6_key->ipv6_label, is_mask);
1155                SW_FLOW_KEY_PUT(match, ip.proto,
1156                                ipv6_key->ipv6_proto, is_mask);
1157                SW_FLOW_KEY_PUT(match, ip.tos,
1158                                ipv6_key->ipv6_tclass, is_mask);
1159                SW_FLOW_KEY_PUT(match, ip.ttl,
1160                                ipv6_key->ipv6_hlimit, is_mask);
1161                SW_FLOW_KEY_PUT(match, ip.frag,
1162                                ipv6_key->ipv6_frag, is_mask);
1163                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1164                                ipv6_key->ipv6_src,
1165                                sizeof(match->key->ipv6.addr.src),
1166                                is_mask);
1167                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1168                                ipv6_key->ipv6_dst,
1169                                sizeof(match->key->ipv6.addr.dst),
1170                                is_mask);
1171
1172                attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1173        }
1174
1175        if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1176                const struct ovs_key_arp *arp_key;
1177
1178                arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1179                if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1180                        OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1181                                  arp_key->arp_op);
1182                        return -EINVAL;
1183                }
1184
1185                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1186                                arp_key->arp_sip, is_mask);
1187                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1188                        arp_key->arp_tip, is_mask);
1189                SW_FLOW_KEY_PUT(match, ip.proto,
1190                                ntohs(arp_key->arp_op), is_mask);
1191                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1192                                arp_key->arp_sha, ETH_ALEN, is_mask);
1193                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1194                                arp_key->arp_tha, ETH_ALEN, is_mask);
1195
1196                attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1197        }
1198
1199        if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1200                const struct ovs_key_mpls *mpls_key;
1201
1202                mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1203                SW_FLOW_KEY_PUT(match, mpls.top_lse,
1204                                mpls_key->mpls_lse, is_mask);
1205
1206                attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1207         }
1208
1209        if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1210                const struct ovs_key_tcp *tcp_key;
1211
1212                tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1213                SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1214                SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1215                attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1216        }
1217
1218        if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1219                SW_FLOW_KEY_PUT(match, tp.flags,
1220                                nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1221                                is_mask);
1222                attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1223        }
1224
1225        if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1226                const struct ovs_key_udp *udp_key;
1227
1228                udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1229                SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1230                SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1231                attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1232        }
1233
1234        if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1235                const struct ovs_key_sctp *sctp_key;
1236
1237                sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1238                SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1239                SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1240                attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1241        }
1242
1243        if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1244                const struct ovs_key_icmp *icmp_key;
1245
1246                icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1247                SW_FLOW_KEY_PUT(match, tp.src,
1248                                htons(icmp_key->icmp_type), is_mask);
1249                SW_FLOW_KEY_PUT(match, tp.dst,
1250                                htons(icmp_key->icmp_code), is_mask);
1251                attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1252        }
1253
1254        if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1255                const struct ovs_key_icmpv6 *icmpv6_key;
1256
1257                icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1258                SW_FLOW_KEY_PUT(match, tp.src,
1259                                htons(icmpv6_key->icmpv6_type), is_mask);
1260                SW_FLOW_KEY_PUT(match, tp.dst,
1261                                htons(icmpv6_key->icmpv6_code), is_mask);
1262                attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1263        }
1264
1265        if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1266                const struct ovs_key_nd *nd_key;
1267
1268                nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1269                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1270                        nd_key->nd_target,
1271                        sizeof(match->key->ipv6.nd.target),
1272                        is_mask);
1273                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1274                        nd_key->nd_sll, ETH_ALEN, is_mask);
1275                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1276                                nd_key->nd_tll, ETH_ALEN, is_mask);
1277                attrs &= ~(1 << OVS_KEY_ATTR_ND);
1278        }
1279
1280        if (attrs != 0) {
1281                OVS_NLERR(log, "Unknown key attributes %llx",
1282                          (unsigned long long)attrs);
1283                return -EINVAL;
1284        }
1285
1286        return 0;
1287}
1288
1289static void nlattr_set(struct nlattr *attr, u8 val,
1290                       const struct ovs_len_tbl *tbl)
1291{
1292        struct nlattr *nla;
1293        int rem;
1294
1295        /* The nlattr stream should already have been validated */
1296        nla_for_each_nested(nla, attr, rem) {
1297                if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
1298                        if (tbl[nla_type(nla)].next)
1299                                tbl = tbl[nla_type(nla)].next;
1300                        nlattr_set(nla, val, tbl);
1301                } else {
1302                        memset(nla_data(nla), val, nla_len(nla));
1303                }
1304
1305                if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1306                        *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1307        }
1308}
1309
1310static void mask_set_nlattr(struct nlattr *attr, u8 val)
1311{
1312        nlattr_set(attr, val, ovs_key_lens);
1313}
1314
1315/**
1316 * ovs_nla_get_match - parses Netlink attributes into a flow key and
1317 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1318 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1319 * does not include any don't care bit.
1320 * @net: Used to determine per-namespace field support.
1321 * @match: receives the extracted flow match information.
1322 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1323 * sequence. The fields should of the packet that triggered the creation
1324 * of this flow.
1325 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1326 * attribute specifies the mask field of the wildcarded flow.
1327 * @log: Boolean to allow kernel error logging.  Normally true, but when
1328 * probing for feature compatibility this should be passed in as false to
1329 * suppress unnecessary error logging.
1330 */
1331int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1332                      const struct nlattr *nla_key,
1333                      const struct nlattr *nla_mask,
1334                      bool log)
1335{
1336        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1337        struct nlattr *newmask = NULL;
1338        u64 key_attrs = 0;
1339        u64 mask_attrs = 0;
1340        int err;
1341
1342        err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1343        if (err)
1344                return err;
1345
1346        err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1347        if (err)
1348                return err;
1349
1350        err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1351        if (err)
1352                return err;
1353
1354        if (match->mask) {
1355                if (!nla_mask) {
1356                        /* Create an exact match mask. We need to set to 0xff
1357                         * all the 'match->mask' fields that have been touched
1358                         * in 'match->key'. We cannot simply memset
1359                         * 'match->mask', because padding bytes and fields not
1360                         * specified in 'match->key' should be left to 0.
1361                         * Instead, we use a stream of netlink attributes,
1362                         * copied from 'key' and set to 0xff.
1363                         * ovs_key_from_nlattrs() will take care of filling
1364                         * 'match->mask' appropriately.
1365                         */
1366                        newmask = kmemdup(nla_key,
1367                                          nla_total_size(nla_len(nla_key)),
1368                                          GFP_KERNEL);
1369                        if (!newmask)
1370                                return -ENOMEM;
1371
1372                        mask_set_nlattr(newmask, 0xff);
1373
1374                        /* The userspace does not send tunnel attributes that
1375                         * are 0, but we should not wildcard them nonetheless.
1376                         */
1377                        if (match->key->tun_proto)
1378                                SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1379                                                         0xff, true);
1380
1381                        nla_mask = newmask;
1382                }
1383
1384                err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1385                if (err)
1386                        goto free_newmask;
1387
1388                /* Always match on tci. */
1389                SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1390                SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1391
1392                err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1393                if (err)
1394                        goto free_newmask;
1395
1396                err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1397                                           log);
1398                if (err)
1399                        goto free_newmask;
1400        }
1401
1402        if (!match_validate(match, key_attrs, mask_attrs, log))
1403                err = -EINVAL;
1404
1405free_newmask:
1406        kfree(newmask);
1407        return err;
1408}
1409
1410static size_t get_ufid_len(const struct nlattr *attr, bool log)
1411{
1412        size_t len;
1413
1414        if (!attr)
1415                return 0;
1416
1417        len = nla_len(attr);
1418        if (len < 1 || len > MAX_UFID_LENGTH) {
1419                OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1420                          nla_len(attr), MAX_UFID_LENGTH);
1421                return 0;
1422        }
1423
1424        return len;
1425}
1426
1427/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1428 * or false otherwise.
1429 */
1430bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1431                      bool log)
1432{
1433        sfid->ufid_len = get_ufid_len(attr, log);
1434        if (sfid->ufid_len)
1435                memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1436
1437        return sfid->ufid_len;
1438}
1439
1440int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1441                           const struct sw_flow_key *key, bool log)
1442{
1443        struct sw_flow_key *new_key;
1444
1445        if (ovs_nla_get_ufid(sfid, ufid, log))
1446                return 0;
1447
1448        /* If UFID was not provided, use unmasked key. */
1449        new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1450        if (!new_key)
1451                return -ENOMEM;
1452        memcpy(new_key, key, sizeof(*key));
1453        sfid->unmasked_key = new_key;
1454
1455        return 0;
1456}
1457
1458u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1459{
1460        return attr ? nla_get_u32(attr) : 0;
1461}
1462
1463/**
1464 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1465 * @key: Receives extracted in_port, priority, tun_key and skb_mark.
1466 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1467 * sequence.
1468 * @log: Boolean to allow kernel error logging.  Normally true, but when
1469 * probing for feature compatibility this should be passed in as false to
1470 * suppress unnecessary error logging.
1471 *
1472 * This parses a series of Netlink attributes that form a flow key, which must
1473 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1474 * get the metadata, that is, the parts of the flow key that cannot be
1475 * extracted from the packet itself.
1476 */
1477
1478int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
1479                              struct sw_flow_key *key,
1480                              bool log)
1481{
1482        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1483        struct sw_flow_match match;
1484        u64 attrs = 0;
1485        int err;
1486
1487        err = parse_flow_nlattrs(attr, a, &attrs, log);
1488        if (err)
1489                return -EINVAL;
1490
1491        memset(&match, 0, sizeof(match));
1492        match.key = key;
1493
1494        memset(&key->ct, 0, sizeof(key->ct));
1495        key->phy.in_port = DP_MAX_PORTS;
1496
1497        return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1498}
1499
1500static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1501                            bool is_mask)
1502{
1503        __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1504
1505        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1506            nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1507                return -EMSGSIZE;
1508        return 0;
1509}
1510
1511static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1512                             const struct sw_flow_key *output, bool is_mask,
1513                             struct sk_buff *skb)
1514{
1515        struct ovs_key_ethernet *eth_key;
1516        struct nlattr *nla;
1517        struct nlattr *encap = NULL;
1518        struct nlattr *in_encap = NULL;
1519
1520        if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1521                goto nla_put_failure;
1522
1523        if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1524                goto nla_put_failure;
1525
1526        if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1527                goto nla_put_failure;
1528
1529        if ((swkey->tun_proto || is_mask)) {
1530                const void *opts = NULL;
1531
1532                if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1533                        opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1534
1535                if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1536                                     swkey->tun_opts_len, swkey->tun_proto))
1537                        goto nla_put_failure;
1538        }
1539
1540        if (swkey->phy.in_port == DP_MAX_PORTS) {
1541                if (is_mask && (output->phy.in_port == 0xffff))
1542                        if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1543                                goto nla_put_failure;
1544        } else {
1545                u16 upper_u16;
1546                upper_u16 = !is_mask ? 0 : 0xffff;
1547
1548                if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1549                                (upper_u16 << 16) | output->phy.in_port))
1550                        goto nla_put_failure;
1551        }
1552
1553        if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1554                goto nla_put_failure;
1555
1556        if (ovs_ct_put_key(output, skb))
1557                goto nla_put_failure;
1558
1559        nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1560        if (!nla)
1561                goto nla_put_failure;
1562
1563        eth_key = nla_data(nla);
1564        ether_addr_copy(eth_key->eth_src, output->eth.src);
1565        ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1566
1567        if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
1568                if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
1569                        goto nla_put_failure;
1570                encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1571                if (!swkey->eth.vlan.tci)
1572                        goto unencap;
1573
1574                if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
1575                        if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
1576                                goto nla_put_failure;
1577                        in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1578                        if (!swkey->eth.cvlan.tci)
1579                                goto unencap;
1580                }
1581        }
1582
1583        if (swkey->eth.type == htons(ETH_P_802_2)) {
1584                /*
1585                 * Ethertype 802.2 is represented in the netlink with omitted
1586                 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1587                 * 0xffff in the mask attribute.  Ethertype can also
1588                 * be wildcarded.
1589                 */
1590                if (is_mask && output->eth.type)
1591                        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1592                                                output->eth.type))
1593                                goto nla_put_failure;
1594                goto unencap;
1595        }
1596
1597        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1598                goto nla_put_failure;
1599
1600        if (eth_type_vlan(swkey->eth.type)) {
1601                /* There are 3 VLAN tags, we don't know anything about the rest
1602                 * of the packet, so truncate here.
1603                 */
1604                WARN_ON_ONCE(!(encap && in_encap));
1605                goto unencap;
1606        }
1607
1608        if (swkey->eth.type == htons(ETH_P_IP)) {
1609                struct ovs_key_ipv4 *ipv4_key;
1610
1611                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
1612                if (!nla)
1613                        goto nla_put_failure;
1614                ipv4_key = nla_data(nla);
1615                ipv4_key->ipv4_src = output->ipv4.addr.src;
1616                ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1617                ipv4_key->ipv4_proto = output->ip.proto;
1618                ipv4_key->ipv4_tos = output->ip.tos;
1619                ipv4_key->ipv4_ttl = output->ip.ttl;
1620                ipv4_key->ipv4_frag = output->ip.frag;
1621        } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1622                struct ovs_key_ipv6 *ipv6_key;
1623
1624                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
1625                if (!nla)
1626                        goto nla_put_failure;
1627                ipv6_key = nla_data(nla);
1628                memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1629                                sizeof(ipv6_key->ipv6_src));
1630                memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1631                                sizeof(ipv6_key->ipv6_dst));
1632                ipv6_key->ipv6_label = output->ipv6.label;
1633                ipv6_key->ipv6_proto = output->ip.proto;
1634                ipv6_key->ipv6_tclass = output->ip.tos;
1635                ipv6_key->ipv6_hlimit = output->ip.ttl;
1636                ipv6_key->ipv6_frag = output->ip.frag;
1637        } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1638                   swkey->eth.type == htons(ETH_P_RARP)) {
1639                struct ovs_key_arp *arp_key;
1640
1641                nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
1642                if (!nla)
1643                        goto nla_put_failure;
1644                arp_key = nla_data(nla);
1645                memset(arp_key, 0, sizeof(struct ovs_key_arp));
1646                arp_key->arp_sip = output->ipv4.addr.src;
1647                arp_key->arp_tip = output->ipv4.addr.dst;
1648                arp_key->arp_op = htons(output->ip.proto);
1649                ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
1650                ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
1651        } else if (eth_p_mpls(swkey->eth.type)) {
1652                struct ovs_key_mpls *mpls_key;
1653
1654                nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
1655                if (!nla)
1656                        goto nla_put_failure;
1657                mpls_key = nla_data(nla);
1658                mpls_key->mpls_lse = output->mpls.top_lse;
1659        }
1660
1661        if ((swkey->eth.type == htons(ETH_P_IP) ||
1662             swkey->eth.type == htons(ETH_P_IPV6)) &&
1663             swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1664
1665                if (swkey->ip.proto == IPPROTO_TCP) {
1666                        struct ovs_key_tcp *tcp_key;
1667
1668                        nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1669                        if (!nla)
1670                                goto nla_put_failure;
1671                        tcp_key = nla_data(nla);
1672                        tcp_key->tcp_src = output->tp.src;
1673                        tcp_key->tcp_dst = output->tp.dst;
1674                        if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1675                                         output->tp.flags))
1676                                goto nla_put_failure;
1677                } else if (swkey->ip.proto == IPPROTO_UDP) {
1678                        struct ovs_key_udp *udp_key;
1679
1680                        nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1681                        if (!nla)
1682                                goto nla_put_failure;
1683                        udp_key = nla_data(nla);
1684                        udp_key->udp_src = output->tp.src;
1685                        udp_key->udp_dst = output->tp.dst;
1686                } else if (swkey->ip.proto == IPPROTO_SCTP) {
1687                        struct ovs_key_sctp *sctp_key;
1688
1689                        nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1690                        if (!nla)
1691                                goto nla_put_failure;
1692                        sctp_key = nla_data(nla);
1693                        sctp_key->sctp_src = output->tp.src;
1694                        sctp_key->sctp_dst = output->tp.dst;
1695                } else if (swkey->eth.type == htons(ETH_P_IP) &&
1696                           swkey->ip.proto == IPPROTO_ICMP) {
1697                        struct ovs_key_icmp *icmp_key;
1698
1699                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1700                        if (!nla)
1701                                goto nla_put_failure;
1702                        icmp_key = nla_data(nla);
1703                        icmp_key->icmp_type = ntohs(output->tp.src);
1704                        icmp_key->icmp_code = ntohs(output->tp.dst);
1705                } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1706                           swkey->ip.proto == IPPROTO_ICMPV6) {
1707                        struct ovs_key_icmpv6 *icmpv6_key;
1708
1709                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1710                                                sizeof(*icmpv6_key));
1711                        if (!nla)
1712                                goto nla_put_failure;
1713                        icmpv6_key = nla_data(nla);
1714                        icmpv6_key->icmpv6_type = ntohs(output->tp.src);
1715                        icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
1716
1717                        if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1718                            icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1719                                struct ovs_key_nd *nd_key;
1720
1721                                nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1722                                if (!nla)
1723                                        goto nla_put_failure;
1724                                nd_key = nla_data(nla);
1725                                memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1726                                                        sizeof(nd_key->nd_target));
1727                                ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
1728                                ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
1729                        }
1730                }
1731        }
1732
1733unencap:
1734        if (in_encap)
1735                nla_nest_end(skb, in_encap);
1736        if (encap)
1737                nla_nest_end(skb, encap);
1738
1739        return 0;
1740
1741nla_put_failure:
1742        return -EMSGSIZE;
1743}
1744
1745int ovs_nla_put_key(const struct sw_flow_key *swkey,
1746                    const struct sw_flow_key *output, int attr, bool is_mask,
1747                    struct sk_buff *skb)
1748{
1749        int err;
1750        struct nlattr *nla;
1751
1752        nla = nla_nest_start(skb, attr);
1753        if (!nla)
1754                return -EMSGSIZE;
1755        err = __ovs_nla_put_key(swkey, output, is_mask, skb);
1756        if (err)
1757                return err;
1758        nla_nest_end(skb, nla);
1759
1760        return 0;
1761}
1762
1763/* Called with ovs_mutex or RCU read lock. */
1764int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
1765{
1766        if (ovs_identifier_is_ufid(&flow->id))
1767                return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
1768                               flow->id.ufid);
1769
1770        return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
1771                               OVS_FLOW_ATTR_KEY, false, skb);
1772}
1773
1774/* Called with ovs_mutex or RCU read lock. */
1775int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
1776{
1777        return ovs_nla_put_key(&flow->key, &flow->key,
1778                                OVS_FLOW_ATTR_KEY, false, skb);
1779}
1780
1781/* Called with ovs_mutex or RCU read lock. */
1782int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
1783{
1784        return ovs_nla_put_key(&flow->key, &flow->mask->key,
1785                                OVS_FLOW_ATTR_MASK, true, skb);
1786}
1787
1788#define MAX_ACTIONS_BUFSIZE     (32 * 1024)
1789
1790static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
1791{
1792        struct sw_flow_actions *sfa;
1793
1794        if (size > MAX_ACTIONS_BUFSIZE) {
1795                OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
1796                return ERR_PTR(-EINVAL);
1797        }
1798
1799        sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1800        if (!sfa)
1801                return ERR_PTR(-ENOMEM);
1802
1803        sfa->actions_len = 0;
1804        return sfa;
1805}
1806
1807static void ovs_nla_free_set_action(const struct nlattr *a)
1808{
1809        const struct nlattr *ovs_key = nla_data(a);
1810        struct ovs_tunnel_info *ovs_tun;
1811
1812        switch (nla_type(ovs_key)) {
1813        case OVS_KEY_ATTR_TUNNEL_INFO:
1814                ovs_tun = nla_data(ovs_key);
1815                dst_release((struct dst_entry *)ovs_tun->tun_dst);
1816                break;
1817        }
1818}
1819
1820void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1821{
1822        const struct nlattr *a;
1823        int rem;
1824
1825        if (!sf_acts)
1826                return;
1827
1828        nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
1829                switch (nla_type(a)) {
1830                case OVS_ACTION_ATTR_SET:
1831                        ovs_nla_free_set_action(a);
1832                        break;
1833                case OVS_ACTION_ATTR_CT:
1834                        ovs_ct_free_action(a);
1835                        break;
1836                }
1837        }
1838
1839        kfree(sf_acts);
1840}
1841
1842static void __ovs_nla_free_flow_actions(struct rcu_head *head)
1843{
1844        ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
1845}
1846
1847/* Schedules 'sf_acts' to be freed after the next RCU grace period.
1848 * The caller must hold rcu_read_lock for this to be sensible. */
1849void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
1850{
1851        call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
1852}
1853
1854static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1855                                       int attr_len, bool log)
1856{
1857
1858        struct sw_flow_actions *acts;
1859        int new_acts_size;
1860        int req_size = NLA_ALIGN(attr_len);
1861        int next_offset = offsetof(struct sw_flow_actions, actions) +
1862                                        (*sfa)->actions_len;
1863
1864        if (req_size <= (ksize(*sfa) - next_offset))
1865                goto out;
1866
1867        new_acts_size = ksize(*sfa) * 2;
1868
1869        if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1870                if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1871                        return ERR_PTR(-EMSGSIZE);
1872                new_acts_size = MAX_ACTIONS_BUFSIZE;
1873        }
1874
1875        acts = nla_alloc_flow_actions(new_acts_size, log);
1876        if (IS_ERR(acts))
1877                return (void *)acts;
1878
1879        memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1880        acts->actions_len = (*sfa)->actions_len;
1881        acts->orig_len = (*sfa)->orig_len;
1882        kfree(*sfa);
1883        *sfa = acts;
1884
1885out:
1886        (*sfa)->actions_len += req_size;
1887        return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1888}
1889
1890static struct nlattr *__add_action(struct sw_flow_actions **sfa,
1891                                   int attrtype, void *data, int len, bool log)
1892{
1893        struct nlattr *a;
1894
1895        a = reserve_sfa_size(sfa, nla_attr_size(len), log);
1896        if (IS_ERR(a))
1897                return a;
1898
1899        a->nla_type = attrtype;
1900        a->nla_len = nla_attr_size(len);
1901
1902        if (data)
1903                memcpy(nla_data(a), data, len);
1904        memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1905
1906        return a;
1907}
1908
1909int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
1910                       int len, bool log)
1911{
1912        struct nlattr *a;
1913
1914        a = __add_action(sfa, attrtype, data, len, log);
1915
1916        return PTR_ERR_OR_ZERO(a);
1917}
1918
1919static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1920                                          int attrtype, bool log)
1921{
1922        int used = (*sfa)->actions_len;
1923        int err;
1924
1925        err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
1926        if (err)
1927                return err;
1928
1929        return used;
1930}
1931
1932static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1933                                         int st_offset)
1934{
1935        struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1936                                                               st_offset);
1937
1938        a->nla_len = sfa->actions_len - st_offset;
1939}
1940
1941static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
1942                                  const struct sw_flow_key *key,
1943                                  int depth, struct sw_flow_actions **sfa,
1944                                  __be16 eth_type, __be16 vlan_tci, bool log);
1945
1946static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
1947                                    const struct sw_flow_key *key, int depth,
1948                                    struct sw_flow_actions **sfa,
1949                                    __be16 eth_type, __be16 vlan_tci, bool log)
1950{
1951        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1952        const struct nlattr *probability, *actions;
1953        const struct nlattr *a;
1954        int rem, start, err, st_acts;
1955
1956        memset(attrs, 0, sizeof(attrs));
1957        nla_for_each_nested(a, attr, rem) {
1958                int type = nla_type(a);
1959                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1960                        return -EINVAL;
1961                attrs[type] = a;
1962        }
1963        if (rem)
1964                return -EINVAL;
1965
1966        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1967        if (!probability || nla_len(probability) != sizeof(u32))
1968                return -EINVAL;
1969
1970        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1971        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1972                return -EINVAL;
1973
1974        /* validation done, copy sample action. */
1975        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
1976        if (start < 0)
1977                return start;
1978        err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1979                                 nla_data(probability), sizeof(u32), log);
1980        if (err)
1981                return err;
1982        st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
1983        if (st_acts < 0)
1984                return st_acts;
1985
1986        err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
1987                                     eth_type, vlan_tci, log);
1988        if (err)
1989                return err;
1990
1991        add_nested_action_end(*sfa, st_acts);
1992        add_nested_action_end(*sfa, start);
1993
1994        return 0;
1995}
1996
1997void ovs_match_init(struct sw_flow_match *match,
1998                    struct sw_flow_key *key,
1999                    bool reset_key,
2000                    struct sw_flow_mask *mask)
2001{
2002        memset(match, 0, sizeof(*match));
2003        match->key = key;
2004        match->mask = mask;
2005
2006        if (reset_key)
2007                memset(key, 0, sizeof(*key));
2008
2009        if (mask) {
2010                memset(&mask->key, 0, sizeof(mask->key));
2011                mask->range.start = mask->range.end = 0;
2012        }
2013}
2014
2015static int validate_geneve_opts(struct sw_flow_key *key)
2016{
2017        struct geneve_opt *option;
2018        int opts_len = key->tun_opts_len;
2019        bool crit_opt = false;
2020
2021        option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2022        while (opts_len > 0) {
2023                int len;
2024
2025                if (opts_len < sizeof(*option))
2026                        return -EINVAL;
2027
2028                len = sizeof(*option) + option->length * 4;
2029                if (len > opts_len)
2030                        return -EINVAL;
2031
2032                crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2033
2034                option = (struct geneve_opt *)((u8 *)option + len);
2035                opts_len -= len;
2036        };
2037
2038        key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2039
2040        return 0;
2041}
2042
2043static int validate_and_copy_set_tun(const struct nlattr *attr,
2044                                     struct sw_flow_actions **sfa, bool log)
2045{
2046        struct sw_flow_match match;
2047        struct sw_flow_key key;
2048        struct metadata_dst *tun_dst;
2049        struct ip_tunnel_info *tun_info;
2050        struct ovs_tunnel_info *ovs_tun;
2051        struct nlattr *a;
2052        int err = 0, start, opts_type;
2053
2054        ovs_match_init(&match, &key, true, NULL);
2055        opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2056        if (opts_type < 0)
2057                return opts_type;
2058
2059        if (key.tun_opts_len) {
2060                switch (opts_type) {
2061                case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2062                        err = validate_geneve_opts(&key);
2063                        if (err < 0)
2064                                return err;
2065                        break;
2066                case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2067                        break;
2068                }
2069        };
2070
2071        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2072        if (start < 0)
2073                return start;
2074
2075        tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL);
2076        if (!tun_dst)
2077                return -ENOMEM;
2078
2079        err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2080        if (err) {
2081                dst_release((struct dst_entry *)tun_dst);
2082                return err;
2083        }
2084
2085        a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2086                         sizeof(*ovs_tun), log);
2087        if (IS_ERR(a)) {
2088                dst_release((struct dst_entry *)tun_dst);
2089                return PTR_ERR(a);
2090        }
2091
2092        ovs_tun = nla_data(a);
2093        ovs_tun->tun_dst = tun_dst;
2094
2095        tun_info = &tun_dst->u.tun_info;
2096        tun_info->mode = IP_TUNNEL_INFO_TX;
2097        if (key.tun_proto == AF_INET6)
2098                tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2099        tun_info->key = key.tun_key;
2100
2101        /* We need to store the options in the action itself since
2102         * everything else will go away after flow setup. We can append
2103         * it to tun_info and then point there.
2104         */
2105        ip_tunnel_info_opts_set(tun_info,
2106                                TUN_METADATA_OPTS(&key, key.tun_opts_len),
2107                                key.tun_opts_len);
2108        add_nested_action_end(*sfa, start);
2109
2110        return err;
2111}
2112
2113/* Return false if there are any non-masked bits set.
2114 * Mask follows data immediately, before any netlink padding.
2115 */
2116static bool validate_masked(u8 *data, int len)
2117{
2118        u8 *mask = data + len;
2119
2120        while (len--)
2121                if (*data++ & ~*mask++)
2122                        return false;
2123
2124        return true;
2125}
2126
2127static int validate_set(const struct nlattr *a,
2128                        const struct sw_flow_key *flow_key,
2129                        struct sw_flow_actions **sfa,
2130                        bool *skip_copy, __be16 eth_type, bool masked, bool log)
2131{
2132        const struct nlattr *ovs_key = nla_data(a);
2133        int key_type = nla_type(ovs_key);
2134        size_t key_len;
2135
2136        /* There can be only one key in a action */
2137        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2138                return -EINVAL;
2139
2140        key_len = nla_len(ovs_key);
2141        if (masked)
2142                key_len /= 2;
2143
2144        if (key_type > OVS_KEY_ATTR_MAX ||
2145            !check_attr_len(key_len, ovs_key_lens[key_type].len))
2146                return -EINVAL;
2147
2148        if (masked && !validate_masked(nla_data(ovs_key), key_len))
2149                return -EINVAL;
2150
2151        switch (key_type) {
2152        const struct ovs_key_ipv4 *ipv4_key;
2153        const struct ovs_key_ipv6 *ipv6_key;
2154        int err;
2155
2156        case OVS_KEY_ATTR_PRIORITY:
2157        case OVS_KEY_ATTR_SKB_MARK:
2158        case OVS_KEY_ATTR_CT_MARK:
2159        case OVS_KEY_ATTR_CT_LABELS:
2160        case OVS_KEY_ATTR_ETHERNET:
2161                break;
2162
2163        case OVS_KEY_ATTR_TUNNEL:
2164                if (masked)
2165                        return -EINVAL; /* Masked tunnel set not supported. */
2166
2167                *skip_copy = true;
2168                err = validate_and_copy_set_tun(a, sfa, log);
2169                if (err)
2170                        return err;
2171                break;
2172
2173        case OVS_KEY_ATTR_IPV4:
2174                if (eth_type != htons(ETH_P_IP))
2175                        return -EINVAL;
2176
2177                ipv4_key = nla_data(ovs_key);
2178
2179                if (masked) {
2180                        const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2181
2182                        /* Non-writeable fields. */
2183                        if (mask->ipv4_proto || mask->ipv4_frag)
2184                                return -EINVAL;
2185                } else {
2186                        if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2187                                return -EINVAL;
2188
2189                        if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2190                                return -EINVAL;
2191                }
2192                break;
2193
2194        case OVS_KEY_ATTR_IPV6:
2195                if (eth_type != htons(ETH_P_IPV6))
2196                        return -EINVAL;
2197
2198                ipv6_key = nla_data(ovs_key);
2199
2200                if (masked) {
2201                        const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2202
2203                        /* Non-writeable fields. */
2204                        if (mask->ipv6_proto || mask->ipv6_frag)
2205                                return -EINVAL;
2206
2207                        /* Invalid bits in the flow label mask? */
2208                        if (ntohl(mask->ipv6_label) & 0xFFF00000)
2209                                return -EINVAL;
2210                } else {
2211                        if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2212                                return -EINVAL;
2213
2214                        if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2215                                return -EINVAL;
2216                }
2217                if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2218                        return -EINVAL;
2219
2220                break;
2221
2222        case OVS_KEY_ATTR_TCP:
2223                if ((eth_type != htons(ETH_P_IP) &&
2224                     eth_type != htons(ETH_P_IPV6)) ||
2225                    flow_key->ip.proto != IPPROTO_TCP)
2226                        return -EINVAL;
2227
2228                break;
2229
2230        case OVS_KEY_ATTR_UDP:
2231                if ((eth_type != htons(ETH_P_IP) &&
2232                     eth_type != htons(ETH_P_IPV6)) ||
2233                    flow_key->ip.proto != IPPROTO_UDP)
2234                        return -EINVAL;
2235
2236                break;
2237
2238        case OVS_KEY_ATTR_MPLS:
2239                if (!eth_p_mpls(eth_type))
2240                        return -EINVAL;
2241                break;
2242
2243        case OVS_KEY_ATTR_SCTP:
2244                if ((eth_type != htons(ETH_P_IP) &&
2245                     eth_type != htons(ETH_P_IPV6)) ||
2246                    flow_key->ip.proto != IPPROTO_SCTP)
2247                        return -EINVAL;
2248
2249                break;
2250
2251        default:
2252                return -EINVAL;
2253        }
2254
2255        /* Convert non-masked non-tunnel set actions to masked set actions. */
2256        if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2257                int start, len = key_len * 2;
2258                struct nlattr *at;
2259
2260                *skip_copy = true;
2261
2262                start = add_nested_action_start(sfa,
2263                                                OVS_ACTION_ATTR_SET_TO_MASKED,
2264                                                log);
2265                if (start < 0)
2266                        return start;
2267
2268                at = __add_action(sfa, key_type, NULL, len, log);
2269                if (IS_ERR(at))
2270                        return PTR_ERR(at);
2271
2272                memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2273                memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
2274                /* Clear non-writeable bits from otherwise writeable fields. */
2275                if (key_type == OVS_KEY_ATTR_IPV6) {
2276                        struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2277
2278                        mask->ipv6_label &= htonl(0x000FFFFF);
2279                }
2280                add_nested_action_end(*sfa, start);
2281        }
2282
2283        return 0;
2284}
2285
2286static int validate_userspace(const struct nlattr *attr)
2287{
2288        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
2289                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
2290                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
2291                [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
2292        };
2293        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
2294        int error;
2295
2296        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
2297                                 attr, userspace_policy);
2298        if (error)
2299                return error;
2300
2301        if (!a[OVS_USERSPACE_ATTR_PID] ||
2302            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
2303                return -EINVAL;
2304
2305        return 0;
2306}
2307
2308static int copy_action(const struct nlattr *from,
2309                       struct sw_flow_actions **sfa, bool log)
2310{
2311        int totlen = NLA_ALIGN(from->nla_len);
2312        struct nlattr *to;
2313
2314        to = reserve_sfa_size(sfa, from->nla_len, log);
2315        if (IS_ERR(to))
2316                return PTR_ERR(to);
2317
2318        memcpy(to, from, totlen);
2319        return 0;
2320}
2321
2322static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2323                                  const struct sw_flow_key *key,
2324                                  int depth, struct sw_flow_actions **sfa,
2325                                  __be16 eth_type, __be16 vlan_tci, bool log)
2326{
2327        const struct nlattr *a;
2328        int rem, err;
2329
2330        if (depth >= SAMPLE_ACTION_DEPTH)
2331                return -EOVERFLOW;
2332
2333        nla_for_each_nested(a, attr, rem) {
2334                /* Expected argument lengths, (u32)-1 for variable length. */
2335                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2336                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2337                        [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2338                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2339                        [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2340                        [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2341                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2342                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
2343                        [OVS_ACTION_ATTR_SET] = (u32)-1,
2344                        [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2345                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2346                        [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2347                        [OVS_ACTION_ATTR_CT] = (u32)-1,
2348                        [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2349                };
2350                const struct ovs_action_push_vlan *vlan;
2351                int type = nla_type(a);
2352                bool skip_copy;
2353
2354                if (type > OVS_ACTION_ATTR_MAX ||
2355                    (action_lens[type] != nla_len(a) &&
2356                     action_lens[type] != (u32)-1))
2357                        return -EINVAL;
2358
2359                skip_copy = false;
2360                switch (type) {
2361                case OVS_ACTION_ATTR_UNSPEC:
2362                        return -EINVAL;
2363
2364                case OVS_ACTION_ATTR_USERSPACE:
2365                        err = validate_userspace(a);
2366                        if (err)
2367                                return err;
2368                        break;
2369
2370                case OVS_ACTION_ATTR_OUTPUT:
2371                        if (nla_get_u32(a) >= DP_MAX_PORTS)
2372                                return -EINVAL;
2373                        break;
2374
2375                case OVS_ACTION_ATTR_TRUNC: {
2376                        const struct ovs_action_trunc *trunc = nla_data(a);
2377
2378                        if (trunc->max_len < ETH_HLEN)
2379                                return -EINVAL;
2380                        break;
2381                }
2382
2383                case OVS_ACTION_ATTR_HASH: {
2384                        const struct ovs_action_hash *act_hash = nla_data(a);
2385
2386                        switch (act_hash->hash_alg) {
2387                        case OVS_HASH_ALG_L4:
2388                                break;
2389                        default:
2390                                return  -EINVAL;
2391                        }
2392
2393                        break;
2394                }
2395
2396                case OVS_ACTION_ATTR_POP_VLAN:
2397                        vlan_tci = htons(0);
2398                        break;
2399
2400                case OVS_ACTION_ATTR_PUSH_VLAN:
2401                        vlan = nla_data(a);
2402                        if (!eth_type_vlan(vlan->vlan_tpid))
2403                                return -EINVAL;
2404                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
2405                                return -EINVAL;
2406                        vlan_tci = vlan->vlan_tci;
2407                        break;
2408
2409                case OVS_ACTION_ATTR_RECIRC:
2410                        break;
2411
2412                case OVS_ACTION_ATTR_PUSH_MPLS: {
2413                        const struct ovs_action_push_mpls *mpls = nla_data(a);
2414
2415                        if (!eth_p_mpls(mpls->mpls_ethertype))
2416                                return -EINVAL;
2417                        /* Prohibit push MPLS other than to a white list
2418                         * for packets that have a known tag order.
2419                         */
2420                        if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2421                            (eth_type != htons(ETH_P_IP) &&
2422                             eth_type != htons(ETH_P_IPV6) &&
2423                             eth_type != htons(ETH_P_ARP) &&
2424                             eth_type != htons(ETH_P_RARP) &&
2425                             !eth_p_mpls(eth_type)))
2426                                return -EINVAL;
2427                        eth_type = mpls->mpls_ethertype;
2428                        break;
2429                }
2430
2431                case OVS_ACTION_ATTR_POP_MPLS:
2432                        if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2433                            !eth_p_mpls(eth_type))
2434                                return -EINVAL;
2435
2436                        /* Disallow subsequent L2.5+ set and mpls_pop actions
2437                         * as there is no check here to ensure that the new
2438                         * eth_type is valid and thus set actions could
2439                         * write off the end of the packet or otherwise
2440                         * corrupt it.
2441                         *
2442                         * Support for these actions is planned using packet
2443                         * recirculation.
2444                         */
2445                        eth_type = htons(0);
2446                        break;
2447
2448                case OVS_ACTION_ATTR_SET:
2449                        err = validate_set(a, key, sfa,
2450                                           &skip_copy, eth_type, false, log);
2451                        if (err)
2452                                return err;
2453                        break;
2454
2455                case OVS_ACTION_ATTR_SET_MASKED:
2456                        err = validate_set(a, key, sfa,
2457                                           &skip_copy, eth_type, true, log);
2458                        if (err)
2459                                return err;
2460                        break;
2461
2462                case OVS_ACTION_ATTR_SAMPLE:
2463                        err = validate_and_copy_sample(net, a, key, depth, sfa,
2464                                                       eth_type, vlan_tci, log);
2465                        if (err)
2466                                return err;
2467                        skip_copy = true;
2468                        break;
2469
2470                case OVS_ACTION_ATTR_CT:
2471                        err = ovs_ct_copy_action(net, a, key, sfa, log);
2472                        if (err)
2473                                return err;
2474                        skip_copy = true;
2475                        break;
2476
2477                default:
2478                        OVS_NLERR(log, "Unknown Action type %d", type);
2479                        return -EINVAL;
2480                }
2481                if (!skip_copy) {
2482                        err = copy_action(a, sfa, log);
2483                        if (err)
2484                                return err;
2485                }
2486        }
2487
2488        if (rem > 0)
2489                return -EINVAL;
2490
2491        return 0;
2492}
2493
2494/* 'key' must be the masked key. */
2495int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2496                         const struct sw_flow_key *key,
2497                         struct sw_flow_actions **sfa, bool log)
2498{
2499        int err;
2500
2501        *sfa = nla_alloc_flow_actions(nla_len(attr), log);
2502        if (IS_ERR(*sfa))
2503                return PTR_ERR(*sfa);
2504
2505        (*sfa)->orig_len = nla_len(attr);
2506        err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
2507                                     key->eth.vlan.tci, log);
2508        if (err)
2509                ovs_nla_free_flow_actions(*sfa);
2510
2511        return err;
2512}
2513
2514static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
2515{
2516        const struct nlattr *a;
2517        struct nlattr *start;
2518        int err = 0, rem;
2519
2520        start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
2521        if (!start)
2522                return -EMSGSIZE;
2523
2524        nla_for_each_nested(a, attr, rem) {
2525                int type = nla_type(a);
2526                struct nlattr *st_sample;
2527
2528                switch (type) {
2529                case OVS_SAMPLE_ATTR_PROBABILITY:
2530                        if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
2531                                    sizeof(u32), nla_data(a)))
2532                                return -EMSGSIZE;
2533                        break;
2534                case OVS_SAMPLE_ATTR_ACTIONS:
2535                        st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
2536                        if (!st_sample)
2537                                return -EMSGSIZE;
2538                        err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
2539                        if (err)
2540                                return err;
2541                        nla_nest_end(skb, st_sample);
2542                        break;
2543                }
2544        }
2545
2546        nla_nest_end(skb, start);
2547        return err;
2548}
2549
2550static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
2551{
2552        const struct nlattr *ovs_key = nla_data(a);
2553        int key_type = nla_type(ovs_key);
2554        struct nlattr *start;
2555        int err;
2556
2557        switch (key_type) {
2558        case OVS_KEY_ATTR_TUNNEL_INFO: {
2559                struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
2560                struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
2561
2562                start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
2563                if (!start)
2564                        return -EMSGSIZE;
2565
2566                err =  ip_tun_to_nlattr(skb, &tun_info->key,
2567                                        ip_tunnel_info_opts(tun_info),
2568                                        tun_info->options_len,
2569                                        ip_tunnel_info_af(tun_info));
2570                if (err)
2571                        return err;
2572                nla_nest_end(skb, start);
2573                break;
2574        }
2575        default:
2576                if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
2577                        return -EMSGSIZE;
2578                break;
2579        }
2580
2581        return 0;
2582}
2583
2584static int masked_set_action_to_set_action_attr(const struct nlattr *a,
2585                                                struct sk_buff *skb)
2586{
2587        const struct nlattr *ovs_key = nla_data(a);
2588        struct nlattr *nla;
2589        size_t key_len = nla_len(ovs_key) / 2;
2590
2591        /* Revert the conversion we did from a non-masked set action to
2592         * masked set action.
2593         */
2594        nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
2595        if (!nla)
2596                return -EMSGSIZE;
2597
2598        if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
2599                return -EMSGSIZE;
2600
2601        nla_nest_end(skb, nla);
2602        return 0;
2603}
2604
2605int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
2606{
2607        const struct nlattr *a;
2608        int rem, err;
2609
2610        nla_for_each_attr(a, attr, len, rem) {
2611                int type = nla_type(a);
2612
2613                switch (type) {
2614                case OVS_ACTION_ATTR_SET:
2615                        err = set_action_to_attr(a, skb);
2616                        if (err)
2617                                return err;
2618                        break;
2619
2620                case OVS_ACTION_ATTR_SET_TO_MASKED:
2621                        err = masked_set_action_to_set_action_attr(a, skb);
2622                        if (err)
2623                                return err;
2624                        break;
2625
2626                case OVS_ACTION_ATTR_SAMPLE:
2627                        err = sample_action_to_attr(a, skb);
2628                        if (err)
2629                                return err;
2630                        break;
2631
2632                case OVS_ACTION_ATTR_CT:
2633                        err = ovs_ct_action_to_attr(nla_data(a), skb);
2634                        if (err)
2635                                return err;
2636                        break;
2637
2638                default:
2639                        if (nla_put(skb, type, nla_len(a), nla_data(a)))
2640                                return -EMSGSIZE;
2641                        break;
2642                }
2643        }
2644
2645        return 0;
2646}
2647