linux/net/openvswitch/flow_netlink.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2017 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include "flow.h"
  22#include "datapath.h"
  23#include <linux/uaccess.h>
  24#include <linux/netdevice.h>
  25#include <linux/etherdevice.h>
  26#include <linux/if_ether.h>
  27#include <linux/if_vlan.h>
  28#include <net/llc_pdu.h>
  29#include <linux/kernel.h>
  30#include <linux/jhash.h>
  31#include <linux/jiffies.h>
  32#include <linux/llc.h>
  33#include <linux/module.h>
  34#include <linux/in.h>
  35#include <linux/rcupdate.h>
  36#include <linux/if_arp.h>
  37#include <linux/ip.h>
  38#include <linux/ipv6.h>
  39#include <linux/sctp.h>
  40#include <linux/tcp.h>
  41#include <linux/udp.h>
  42#include <linux/icmp.h>
  43#include <linux/icmpv6.h>
  44#include <linux/rculist.h>
  45#include <net/geneve.h>
  46#include <net/ip.h>
  47#include <net/ipv6.h>
  48#include <net/ndisc.h>
  49#include <net/mpls.h>
  50#include <net/vxlan.h>
  51#include <net/tun_proto.h>
  52#include <net/erspan.h>
  53
  54#include "flow_netlink.h"
  55
  56struct ovs_len_tbl {
  57        int len;
  58        const struct ovs_len_tbl *next;
  59};
  60
  61#define OVS_ATTR_NESTED -1
  62#define OVS_ATTR_VARIABLE -2
  63
  64static bool actions_may_change_flow(const struct nlattr *actions)
  65{
  66        struct nlattr *nla;
  67        int rem;
  68
  69        nla_for_each_nested(nla, actions, rem) {
  70                u16 action = nla_type(nla);
  71
  72                switch (action) {
  73                case OVS_ACTION_ATTR_OUTPUT:
  74                case OVS_ACTION_ATTR_RECIRC:
  75                case OVS_ACTION_ATTR_TRUNC:
  76                case OVS_ACTION_ATTR_USERSPACE:
  77                        break;
  78
  79                case OVS_ACTION_ATTR_CT:
  80                case OVS_ACTION_ATTR_CT_CLEAR:
  81                case OVS_ACTION_ATTR_HASH:
  82                case OVS_ACTION_ATTR_POP_ETH:
  83                case OVS_ACTION_ATTR_POP_MPLS:
  84                case OVS_ACTION_ATTR_POP_NSH:
  85                case OVS_ACTION_ATTR_POP_VLAN:
  86                case OVS_ACTION_ATTR_PUSH_ETH:
  87                case OVS_ACTION_ATTR_PUSH_MPLS:
  88                case OVS_ACTION_ATTR_PUSH_NSH:
  89                case OVS_ACTION_ATTR_PUSH_VLAN:
  90                case OVS_ACTION_ATTR_SAMPLE:
  91                case OVS_ACTION_ATTR_SET:
  92                case OVS_ACTION_ATTR_SET_MASKED:
  93                case OVS_ACTION_ATTR_METER:
  94                default:
  95                        return true;
  96                }
  97        }
  98        return false;
  99}
 100
 101static void update_range(struct sw_flow_match *match,
 102                         size_t offset, size_t size, bool is_mask)
 103{
 104        struct sw_flow_key_range *range;
 105        size_t start = rounddown(offset, sizeof(long));
 106        size_t end = roundup(offset + size, sizeof(long));
 107
 108        if (!is_mask)
 109                range = &match->range;
 110        else
 111                range = &match->mask->range;
 112
 113        if (range->start == range->end) {
 114                range->start = start;
 115                range->end = end;
 116                return;
 117        }
 118
 119        if (range->start > start)
 120                range->start = start;
 121
 122        if (range->end < end)
 123                range->end = end;
 124}
 125
 126#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
 127        do { \
 128                update_range(match, offsetof(struct sw_flow_key, field),    \
 129                             sizeof((match)->key->field), is_mask);         \
 130                if (is_mask)                                                \
 131                        (match)->mask->key.field = value;                   \
 132                else                                                        \
 133                        (match)->key->field = value;                        \
 134        } while (0)
 135
 136#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
 137        do {                                                                \
 138                update_range(match, offset, len, is_mask);                  \
 139                if (is_mask)                                                \
 140                        memcpy((u8 *)&(match)->mask->key + offset, value_p, \
 141                               len);                                       \
 142                else                                                        \
 143                        memcpy((u8 *)(match)->key + offset, value_p, len);  \
 144        } while (0)
 145
 146#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
 147        SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
 148                                  value_p, len, is_mask)
 149
 150#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
 151        do {                                                                \
 152                update_range(match, offsetof(struct sw_flow_key, field),    \
 153                             sizeof((match)->key->field), is_mask);         \
 154                if (is_mask)                                                \
 155                        memset((u8 *)&(match)->mask->key.field, value,      \
 156                               sizeof((match)->mask->key.field));           \
 157                else                                                        \
 158                        memset((u8 *)&(match)->key->field, value,           \
 159                               sizeof((match)->key->field));                \
 160        } while (0)
 161
 162static bool match_validate(const struct sw_flow_match *match,
 163                           u64 key_attrs, u64 mask_attrs, bool log)
 164{
 165        u64 key_expected = 0;
 166        u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 167
 168        /* The following mask attributes allowed only if they
 169         * pass the validation tests. */
 170        mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
 171                        | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
 172                        | (1 << OVS_KEY_ATTR_IPV6)
 173                        | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
 174                        | (1 << OVS_KEY_ATTR_TCP)
 175                        | (1 << OVS_KEY_ATTR_TCP_FLAGS)
 176                        | (1 << OVS_KEY_ATTR_UDP)
 177                        | (1 << OVS_KEY_ATTR_SCTP)
 178                        | (1 << OVS_KEY_ATTR_ICMP)
 179                        | (1 << OVS_KEY_ATTR_ICMPV6)
 180                        | (1 << OVS_KEY_ATTR_ARP)
 181                        | (1 << OVS_KEY_ATTR_ND)
 182                        | (1 << OVS_KEY_ATTR_MPLS)
 183                        | (1 << OVS_KEY_ATTR_NSH));
 184
 185        /* Always allowed mask fields. */
 186        mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
 187                       | (1 << OVS_KEY_ATTR_IN_PORT)
 188                       | (1 << OVS_KEY_ATTR_ETHERTYPE));
 189
 190        /* Check key attributes. */
 191        if (match->key->eth.type == htons(ETH_P_ARP)
 192                        || match->key->eth.type == htons(ETH_P_RARP)) {
 193                key_expected |= 1 << OVS_KEY_ATTR_ARP;
 194                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 195                        mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
 196        }
 197
 198        if (eth_p_mpls(match->key->eth.type)) {
 199                key_expected |= 1 << OVS_KEY_ATTR_MPLS;
 200                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 201                        mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
 202        }
 203
 204        if (match->key->eth.type == htons(ETH_P_IP)) {
 205                key_expected |= 1 << OVS_KEY_ATTR_IPV4;
 206                if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
 207                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
 208                        mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
 209                }
 210
 211                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 212                        if (match->key->ip.proto == IPPROTO_UDP) {
 213                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 214                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 215                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 216                        }
 217
 218                        if (match->key->ip.proto == IPPROTO_SCTP) {
 219                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 220                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 221                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 222                        }
 223
 224                        if (match->key->ip.proto == IPPROTO_TCP) {
 225                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 226                                key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 227                                if (match->mask && (match->mask->key.ip.proto == 0xff)) {
 228                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 229                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 230                                }
 231                        }
 232
 233                        if (match->key->ip.proto == IPPROTO_ICMP) {
 234                                key_expected |= 1 << OVS_KEY_ATTR_ICMP;
 235                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 236                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
 237                        }
 238                }
 239        }
 240
 241        if (match->key->eth.type == htons(ETH_P_IPV6)) {
 242                key_expected |= 1 << OVS_KEY_ATTR_IPV6;
 243                if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
 244                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
 245                        mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
 246                }
 247
 248                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 249                        if (match->key->ip.proto == IPPROTO_UDP) {
 250                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 251                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 252                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 253                        }
 254
 255                        if (match->key->ip.proto == IPPROTO_SCTP) {
 256                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 257                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 258                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 259                        }
 260
 261                        if (match->key->ip.proto == IPPROTO_TCP) {
 262                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 263                                key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 264                                if (match->mask && (match->mask->key.ip.proto == 0xff)) {
 265                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 266                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
 267                                }
 268                        }
 269
 270                        if (match->key->ip.proto == IPPROTO_ICMPV6) {
 271                                key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
 272                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 273                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
 274
 275                                if (match->key->tp.src ==
 276                                                htons(NDISC_NEIGHBOUR_SOLICITATION) ||
 277                                    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 278                                        key_expected |= 1 << OVS_KEY_ATTR_ND;
 279                                        /* Original direction conntrack tuple
 280                                         * uses the same space as the ND fields
 281                                         * in the key, so both are not allowed
 282                                         * at the same time.
 283                                         */
 284                                        mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
 285                                        if (match->mask && (match->mask->key.tp.src == htons(0xff)))
 286                                                mask_allowed |= 1 << OVS_KEY_ATTR_ND;
 287                                }
 288                        }
 289                }
 290        }
 291
 292        if (match->key->eth.type == htons(ETH_P_NSH)) {
 293                key_expected |= 1 << OVS_KEY_ATTR_NSH;
 294                if (match->mask &&
 295                    match->mask->key.eth.type == htons(0xffff)) {
 296                        mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
 297                }
 298        }
 299
 300        if ((key_attrs & key_expected) != key_expected) {
 301                /* Key attributes check failed. */
 302                OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
 303                          (unsigned long long)key_attrs,
 304                          (unsigned long long)key_expected);
 305                return false;
 306        }
 307
 308        if ((mask_attrs & mask_allowed) != mask_attrs) {
 309                /* Mask attributes check failed. */
 310                OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
 311                          (unsigned long long)mask_attrs,
 312                          (unsigned long long)mask_allowed);
 313                return false;
 314        }
 315
 316        return true;
 317}
 318
 319size_t ovs_tun_key_attr_size(void)
 320{
 321        /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
 322         * updating this function.
 323         */
 324        return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
 325                + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
 326                + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
 327                + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
 328                + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
 329                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 330                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
 331                + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
 332                + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
 333                /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
 334                 * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
 335                 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
 336                 */
 337                + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
 338                + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 339}
 340
 341static size_t ovs_nsh_key_attr_size(void)
 342{
 343        /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
 344         * updating this function.
 345         */
 346        return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
 347                /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
 348                 * mutually exclusive, so the bigger one can cover
 349                 * the small one.
 350                 */
 351                + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
 352}
 353
 354size_t ovs_key_attr_size(void)
 355{
 356        /* Whenever adding new OVS_KEY_ FIELDS, we should consider
 357         * updating this function.
 358         */
 359        BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
 360
 361        return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 362                + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
 363                  + ovs_tun_key_attr_size()
 364                + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 365                + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 366                + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
 367                + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
 368                + nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
 369                + nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
 370                + nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
 371                + nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
 372                + nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
 373                + nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
 374                  + ovs_nsh_key_attr_size()
 375                + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 376                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 377                + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
 378                + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
 379                + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 380                + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
 381                + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
 382                + nla_total_size(28); /* OVS_KEY_ATTR_ND */
 383}
 384
 385static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
 386        [OVS_VXLAN_EXT_GBP]         = { .len = sizeof(u32) },
 387};
 388
 389static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
 390        [OVS_TUNNEL_KEY_ATTR_ID]            = { .len = sizeof(u64) },
 391        [OVS_TUNNEL_KEY_ATTR_IPV4_SRC]      = { .len = sizeof(u32) },
 392        [OVS_TUNNEL_KEY_ATTR_IPV4_DST]      = { .len = sizeof(u32) },
 393        [OVS_TUNNEL_KEY_ATTR_TOS]           = { .len = 1 },
 394        [OVS_TUNNEL_KEY_ATTR_TTL]           = { .len = 1 },
 395        [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
 396        [OVS_TUNNEL_KEY_ATTR_CSUM]          = { .len = 0 },
 397        [OVS_TUNNEL_KEY_ATTR_TP_SRC]        = { .len = sizeof(u16) },
 398        [OVS_TUNNEL_KEY_ATTR_TP_DST]        = { .len = sizeof(u16) },
 399        [OVS_TUNNEL_KEY_ATTR_OAM]           = { .len = 0 },
 400        [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
 401        [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
 402                                                .next = ovs_vxlan_ext_key_lens },
 403        [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
 404        [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
 405        [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
 406};
 407
 408static const struct ovs_len_tbl
 409ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
 410        [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
 411        [OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
 412        [OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
 413};
 414
 415/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
 416static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 417        [OVS_KEY_ATTR_ENCAP]     = { .len = OVS_ATTR_NESTED },
 418        [OVS_KEY_ATTR_PRIORITY]  = { .len = sizeof(u32) },
 419        [OVS_KEY_ATTR_IN_PORT]   = { .len = sizeof(u32) },
 420        [OVS_KEY_ATTR_SKB_MARK]  = { .len = sizeof(u32) },
 421        [OVS_KEY_ATTR_ETHERNET]  = { .len = sizeof(struct ovs_key_ethernet) },
 422        [OVS_KEY_ATTR_VLAN]      = { .len = sizeof(__be16) },
 423        [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
 424        [OVS_KEY_ATTR_IPV4]      = { .len = sizeof(struct ovs_key_ipv4) },
 425        [OVS_KEY_ATTR_IPV6]      = { .len = sizeof(struct ovs_key_ipv6) },
 426        [OVS_KEY_ATTR_TCP]       = { .len = sizeof(struct ovs_key_tcp) },
 427        [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
 428        [OVS_KEY_ATTR_UDP]       = { .len = sizeof(struct ovs_key_udp) },
 429        [OVS_KEY_ATTR_SCTP]      = { .len = sizeof(struct ovs_key_sctp) },
 430        [OVS_KEY_ATTR_ICMP]      = { .len = sizeof(struct ovs_key_icmp) },
 431        [OVS_KEY_ATTR_ICMPV6]    = { .len = sizeof(struct ovs_key_icmpv6) },
 432        [OVS_KEY_ATTR_ARP]       = { .len = sizeof(struct ovs_key_arp) },
 433        [OVS_KEY_ATTR_ND]        = { .len = sizeof(struct ovs_key_nd) },
 434        [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
 435        [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
 436        [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
 437                                     .next = ovs_tunnel_key_lens, },
 438        [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
 439        [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
 440        [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
 441        [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
 442        [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
 443        [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
 444                .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
 445        [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
 446                .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
 447        [OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
 448                                     .next = ovs_nsh_key_attr_lens, },
 449};
 450
 451static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
 452{
 453        return expected_len == attr_len ||
 454               expected_len == OVS_ATTR_NESTED ||
 455               expected_len == OVS_ATTR_VARIABLE;
 456}
 457
 458static bool is_all_zero(const u8 *fp, size_t size)
 459{
 460        int i;
 461
 462        if (!fp)
 463                return false;
 464
 465        for (i = 0; i < size; i++)
 466                if (fp[i])
 467                        return false;
 468
 469        return true;
 470}
 471
 472static int __parse_flow_nlattrs(const struct nlattr *attr,
 473                                const struct nlattr *a[],
 474                                u64 *attrsp, bool log, bool nz)
 475{
 476        const struct nlattr *nla;
 477        u64 attrs;
 478        int rem;
 479
 480        attrs = *attrsp;
 481        nla_for_each_nested(nla, attr, rem) {
 482                u16 type = nla_type(nla);
 483                int expected_len;
 484
 485                if (type > OVS_KEY_ATTR_MAX) {
 486                        OVS_NLERR(log, "Key type %d is out of range max %d",
 487                                  type, OVS_KEY_ATTR_MAX);
 488                        return -EINVAL;
 489                }
 490
 491                if (attrs & (1 << type)) {
 492                        OVS_NLERR(log, "Duplicate key (type %d).", type);
 493                        return -EINVAL;
 494                }
 495
 496                expected_len = ovs_key_lens[type].len;
 497                if (!check_attr_len(nla_len(nla), expected_len)) {
 498                        OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
 499                                  type, nla_len(nla), expected_len);
 500                        return -EINVAL;
 501                }
 502
 503                if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
 504                        attrs |= 1 << type;
 505                        a[type] = nla;
 506                }
 507        }
 508        if (rem) {
 509                OVS_NLERR(log, "Message has %d unknown bytes.", rem);
 510                return -EINVAL;
 511        }
 512
 513        *attrsp = attrs;
 514        return 0;
 515}
 516
 517static int parse_flow_mask_nlattrs(const struct nlattr *attr,
 518                                   const struct nlattr *a[], u64 *attrsp,
 519                                   bool log)
 520{
 521        return __parse_flow_nlattrs(attr, a, attrsp, log, true);
 522}
 523
 524int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
 525                       u64 *attrsp, bool log)
 526{
 527        return __parse_flow_nlattrs(attr, a, attrsp, log, false);
 528}
 529
 530static int genev_tun_opt_from_nlattr(const struct nlattr *a,
 531                                     struct sw_flow_match *match, bool is_mask,
 532                                     bool log)
 533{
 534        unsigned long opt_key_offset;
 535
 536        if (nla_len(a) > sizeof(match->key->tun_opts)) {
 537                OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
 538                          nla_len(a), sizeof(match->key->tun_opts));
 539                return -EINVAL;
 540        }
 541
 542        if (nla_len(a) % 4 != 0) {
 543                OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
 544                          nla_len(a));
 545                return -EINVAL;
 546        }
 547
 548        /* We need to record the length of the options passed
 549         * down, otherwise packets with the same format but
 550         * additional options will be silently matched.
 551         */
 552        if (!is_mask) {
 553                SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
 554                                false);
 555        } else {
 556                /* This is somewhat unusual because it looks at
 557                 * both the key and mask while parsing the
 558                 * attributes (and by extension assumes the key
 559                 * is parsed first). Normally, we would verify
 560                 * that each is the correct length and that the
 561                 * attributes line up in the validate function.
 562                 * However, that is difficult because this is
 563                 * variable length and we won't have the
 564                 * information later.
 565                 */
 566                if (match->key->tun_opts_len != nla_len(a)) {
 567                        OVS_NLERR(log, "Geneve option len %d != mask len %d",
 568                                  match->key->tun_opts_len, nla_len(a));
 569                        return -EINVAL;
 570                }
 571
 572                SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 573        }
 574
 575        opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
 576        SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
 577                                  nla_len(a), is_mask);
 578        return 0;
 579}
 580
 581static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
 582                                     struct sw_flow_match *match, bool is_mask,
 583                                     bool log)
 584{
 585        struct nlattr *a;
 586        int rem;
 587        unsigned long opt_key_offset;
 588        struct vxlan_metadata opts;
 589
 590        BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
 591
 592        memset(&opts, 0, sizeof(opts));
 593        nla_for_each_nested(a, attr, rem) {
 594                int type = nla_type(a);
 595
 596                if (type > OVS_VXLAN_EXT_MAX) {
 597                        OVS_NLERR(log, "VXLAN extension %d out of range max %d",
 598                                  type, OVS_VXLAN_EXT_MAX);
 599                        return -EINVAL;
 600                }
 601
 602                if (!check_attr_len(nla_len(a),
 603                                    ovs_vxlan_ext_key_lens[type].len)) {
 604                        OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
 605                                  type, nla_len(a),
 606                                  ovs_vxlan_ext_key_lens[type].len);
 607                        return -EINVAL;
 608                }
 609
 610                switch (type) {
 611                case OVS_VXLAN_EXT_GBP:
 612                        opts.gbp = nla_get_u32(a);
 613                        break;
 614                default:
 615                        OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
 616                                  type);
 617                        return -EINVAL;
 618                }
 619        }
 620        if (rem) {
 621                OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
 622                          rem);
 623                return -EINVAL;
 624        }
 625
 626        if (!is_mask)
 627                SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
 628        else
 629                SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 630
 631        opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
 632        SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
 633                                  is_mask);
 634        return 0;
 635}
 636
 637static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
 638                                      struct sw_flow_match *match, bool is_mask,
 639                                      bool log)
 640{
 641        unsigned long opt_key_offset;
 642
 643        BUILD_BUG_ON(sizeof(struct erspan_metadata) >
 644                     sizeof(match->key->tun_opts));
 645
 646        if (nla_len(a) > sizeof(match->key->tun_opts)) {
 647                OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
 648                          nla_len(a), sizeof(match->key->tun_opts));
 649                return -EINVAL;
 650        }
 651
 652        if (!is_mask)
 653                SW_FLOW_KEY_PUT(match, tun_opts_len,
 654                                sizeof(struct erspan_metadata), false);
 655        else
 656                SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 657
 658        opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
 659        SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
 660                                  nla_len(a), is_mask);
 661        return 0;
 662}
 663
 664static int ip_tun_from_nlattr(const struct nlattr *attr,
 665                              struct sw_flow_match *match, bool is_mask,
 666                              bool log)
 667{
 668        bool ttl = false, ipv4 = false, ipv6 = false;
 669        __be16 tun_flags = 0;
 670        int opts_type = 0;
 671        struct nlattr *a;
 672        int rem;
 673
 674        nla_for_each_nested(a, attr, rem) {
 675                int type = nla_type(a);
 676                int err;
 677
 678                if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
 679                        OVS_NLERR(log, "Tunnel attr %d out of range max %d",
 680                                  type, OVS_TUNNEL_KEY_ATTR_MAX);
 681                        return -EINVAL;
 682                }
 683
 684                if (!check_attr_len(nla_len(a),
 685                                    ovs_tunnel_key_lens[type].len)) {
 686                        OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
 687                                  type, nla_len(a), ovs_tunnel_key_lens[type].len);
 688                        return -EINVAL;
 689                }
 690
 691                switch (type) {
 692                case OVS_TUNNEL_KEY_ATTR_ID:
 693                        SW_FLOW_KEY_PUT(match, tun_key.tun_id,
 694                                        nla_get_be64(a), is_mask);
 695                        tun_flags |= TUNNEL_KEY;
 696                        break;
 697                case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
 698                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
 699                                        nla_get_in_addr(a), is_mask);
 700                        ipv4 = true;
 701                        break;
 702                case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
 703                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
 704                                        nla_get_in_addr(a), is_mask);
 705                        ipv4 = true;
 706                        break;
 707                case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
 708                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
 709                                        nla_get_in6_addr(a), is_mask);
 710                        ipv6 = true;
 711                        break;
 712                case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
 713                        SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
 714                                        nla_get_in6_addr(a), is_mask);
 715                        ipv6 = true;
 716                        break;
 717                case OVS_TUNNEL_KEY_ATTR_TOS:
 718                        SW_FLOW_KEY_PUT(match, tun_key.tos,
 719                                        nla_get_u8(a), is_mask);
 720                        break;
 721                case OVS_TUNNEL_KEY_ATTR_TTL:
 722                        SW_FLOW_KEY_PUT(match, tun_key.ttl,
 723                                        nla_get_u8(a), is_mask);
 724                        ttl = true;
 725                        break;
 726                case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
 727                        tun_flags |= TUNNEL_DONT_FRAGMENT;
 728                        break;
 729                case OVS_TUNNEL_KEY_ATTR_CSUM:
 730                        tun_flags |= TUNNEL_CSUM;
 731                        break;
 732                case OVS_TUNNEL_KEY_ATTR_TP_SRC:
 733                        SW_FLOW_KEY_PUT(match, tun_key.tp_src,
 734                                        nla_get_be16(a), is_mask);
 735                        break;
 736                case OVS_TUNNEL_KEY_ATTR_TP_DST:
 737                        SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
 738                                        nla_get_be16(a), is_mask);
 739                        break;
 740                case OVS_TUNNEL_KEY_ATTR_OAM:
 741                        tun_flags |= TUNNEL_OAM;
 742                        break;
 743                case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
 744                        if (opts_type) {
 745                                OVS_NLERR(log, "Multiple metadata blocks provided");
 746                                return -EINVAL;
 747                        }
 748
 749                        err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
 750                        if (err)
 751                                return err;
 752
 753                        tun_flags |= TUNNEL_GENEVE_OPT;
 754                        opts_type = type;
 755                        break;
 756                case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
 757                        if (opts_type) {
 758                                OVS_NLERR(log, "Multiple metadata blocks provided");
 759                                return -EINVAL;
 760                        }
 761
 762                        err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
 763                        if (err)
 764                                return err;
 765
 766                        tun_flags |= TUNNEL_VXLAN_OPT;
 767                        opts_type = type;
 768                        break;
 769                case OVS_TUNNEL_KEY_ATTR_PAD:
 770                        break;
 771                case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
 772                        if (opts_type) {
 773                                OVS_NLERR(log, "Multiple metadata blocks provided");
 774                                return -EINVAL;
 775                        }
 776
 777                        err = erspan_tun_opt_from_nlattr(a, match, is_mask,
 778                                                         log);
 779                        if (err)
 780                                return err;
 781
 782                        tun_flags |= TUNNEL_ERSPAN_OPT;
 783                        opts_type = type;
 784                        break;
 785                default:
 786                        OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 787                                  type);
 788                        return -EINVAL;
 789                }
 790        }
 791
 792        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 793        if (is_mask)
 794                SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
 795        else
 796                SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
 797                                false);
 798
 799        if (rem > 0) {
 800                OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
 801                          rem);
 802                return -EINVAL;
 803        }
 804
 805        if (ipv4 && ipv6) {
 806                OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
 807                return -EINVAL;
 808        }
 809
 810        if (!is_mask) {
 811                if (!ipv4 && !ipv6) {
 812                        OVS_NLERR(log, "IP tunnel dst address not specified");
 813                        return -EINVAL;
 814                }
 815                if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
 816                        OVS_NLERR(log, "IPv4 tunnel dst address is zero");
 817                        return -EINVAL;
 818                }
 819                if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
 820                        OVS_NLERR(log, "IPv6 tunnel dst address is zero");
 821                        return -EINVAL;
 822                }
 823
 824                if (!ttl) {
 825                        OVS_NLERR(log, "IP tunnel TTL not specified.");
 826                        return -EINVAL;
 827                }
 828        }
 829
 830        return opts_type;
 831}
 832
 833static int vxlan_opt_to_nlattr(struct sk_buff *skb,
 834                               const void *tun_opts, int swkey_tun_opts_len)
 835{
 836        const struct vxlan_metadata *opts = tun_opts;
 837        struct nlattr *nla;
 838
 839        nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
 840        if (!nla)
 841                return -EMSGSIZE;
 842
 843        if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
 844                return -EMSGSIZE;
 845
 846        nla_nest_end(skb, nla);
 847        return 0;
 848}
 849
 850static int __ip_tun_to_nlattr(struct sk_buff *skb,
 851                              const struct ip_tunnel_key *output,
 852                              const void *tun_opts, int swkey_tun_opts_len,
 853                              unsigned short tun_proto)
 854{
 855        if (output->tun_flags & TUNNEL_KEY &&
 856            nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
 857                         OVS_TUNNEL_KEY_ATTR_PAD))
 858                return -EMSGSIZE;
 859        switch (tun_proto) {
 860        case AF_INET:
 861                if (output->u.ipv4.src &&
 862                    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
 863                                    output->u.ipv4.src))
 864                        return -EMSGSIZE;
 865                if (output->u.ipv4.dst &&
 866                    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
 867                                    output->u.ipv4.dst))
 868                        return -EMSGSIZE;
 869                break;
 870        case AF_INET6:
 871                if (!ipv6_addr_any(&output->u.ipv6.src) &&
 872                    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
 873                                     &output->u.ipv6.src))
 874                        return -EMSGSIZE;
 875                if (!ipv6_addr_any(&output->u.ipv6.dst) &&
 876                    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
 877                                     &output->u.ipv6.dst))
 878                        return -EMSGSIZE;
 879                break;
 880        }
 881        if (output->tos &&
 882            nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
 883                return -EMSGSIZE;
 884        if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
 885                return -EMSGSIZE;
 886        if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
 887            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
 888                return -EMSGSIZE;
 889        if ((output->tun_flags & TUNNEL_CSUM) &&
 890            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 891                return -EMSGSIZE;
 892        if (output->tp_src &&
 893            nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
 894                return -EMSGSIZE;
 895        if (output->tp_dst &&
 896            nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
 897                return -EMSGSIZE;
 898        if ((output->tun_flags & TUNNEL_OAM) &&
 899            nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 900                return -EMSGSIZE;
 901        if (swkey_tun_opts_len) {
 902                if (output->tun_flags & TUNNEL_GENEVE_OPT &&
 903                    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
 904                            swkey_tun_opts_len, tun_opts))
 905                        return -EMSGSIZE;
 906                else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
 907                         vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
 908                        return -EMSGSIZE;
 909                else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
 910                         nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
 911                                 swkey_tun_opts_len, tun_opts))
 912                        return -EMSGSIZE;
 913        }
 914
 915        return 0;
 916}
 917
 918static int ip_tun_to_nlattr(struct sk_buff *skb,
 919                            const struct ip_tunnel_key *output,
 920                            const void *tun_opts, int swkey_tun_opts_len,
 921                            unsigned short tun_proto)
 922{
 923        struct nlattr *nla;
 924        int err;
 925
 926        nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
 927        if (!nla)
 928                return -EMSGSIZE;
 929
 930        err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
 931                                 tun_proto);
 932        if (err)
 933                return err;
 934
 935        nla_nest_end(skb, nla);
 936        return 0;
 937}
 938
 939int ovs_nla_put_tunnel_info(struct sk_buff *skb,
 940                            struct ip_tunnel_info *tun_info)
 941{
 942        return __ip_tun_to_nlattr(skb, &tun_info->key,
 943                                  ip_tunnel_info_opts(tun_info),
 944                                  tun_info->options_len,
 945                                  ip_tunnel_info_af(tun_info));
 946}
 947
 948static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
 949                                    const struct nlattr *a[],
 950                                    bool is_mask, bool inner)
 951{
 952        __be16 tci = 0;
 953        __be16 tpid = 0;
 954
 955        if (a[OVS_KEY_ATTR_VLAN])
 956                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 957
 958        if (a[OVS_KEY_ATTR_ETHERTYPE])
 959                tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
 960
 961        if (likely(!inner)) {
 962                SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
 963                SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
 964        } else {
 965                SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
 966                SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
 967        }
 968        return 0;
 969}
 970
 971static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
 972                                      u64 key_attrs, bool inner,
 973                                      const struct nlattr **a, bool log)
 974{
 975        __be16 tci = 0;
 976
 977        if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
 978              (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
 979               eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
 980                /* Not a VLAN. */
 981                return 0;
 982        }
 983
 984        if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
 985              (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
 986                OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
 987                return -EINVAL;
 988        }
 989
 990        if (a[OVS_KEY_ATTR_VLAN])
 991                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
 992
 993        if (!(tci & htons(VLAN_TAG_PRESENT))) {
 994                if (tci) {
 995                        OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
 996                                  (inner) ? "C-VLAN" : "VLAN");
 997                        return -EINVAL;
 998                } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
 999                        /* Corner case for truncated VLAN header. */
1000                        OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
1001                                  (inner) ? "C-VLAN" : "VLAN");
1002                        return -EINVAL;
1003                }
1004        }
1005
1006        return 1;
1007}
1008
1009static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1010                                           u64 key_attrs, bool inner,
1011                                           const struct nlattr **a, bool log)
1012{
1013        __be16 tci = 0;
1014        __be16 tpid = 0;
1015        bool encap_valid = !!(match->key->eth.vlan.tci &
1016                              htons(VLAN_TAG_PRESENT));
1017        bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1018                                htons(VLAN_TAG_PRESENT));
1019
1020        if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1021                /* Not a VLAN. */
1022                return 0;
1023        }
1024
1025        if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1026                OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1027                          (inner) ? "C-VLAN" : "VLAN");
1028                return -EINVAL;
1029        }
1030
1031        if (a[OVS_KEY_ATTR_VLAN])
1032                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1033
1034        if (a[OVS_KEY_ATTR_ETHERTYPE])
1035                tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1036
1037        if (tpid != htons(0xffff)) {
1038                OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1039                          (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1040                return -EINVAL;
1041        }
1042        if (!(tci & htons(VLAN_TAG_PRESENT))) {
1043                OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
1044                          (inner) ? "C-VLAN" : "VLAN");
1045                return -EINVAL;
1046        }
1047
1048        return 1;
1049}
1050
1051static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1052                                     u64 *key_attrs, bool inner,
1053                                     const struct nlattr **a, bool is_mask,
1054                                     bool log)
1055{
1056        int err;
1057        const struct nlattr *encap;
1058
1059        if (!is_mask)
1060                err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1061                                                 a, log);
1062        else
1063                err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1064                                                      a, log);
1065        if (err <= 0)
1066                return err;
1067
1068        err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1069        if (err)
1070                return err;
1071
1072        *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1073        *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1074        *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1075
1076        encap = a[OVS_KEY_ATTR_ENCAP];
1077
1078        if (!is_mask)
1079                err = parse_flow_nlattrs(encap, a, key_attrs, log);
1080        else
1081                err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1082
1083        return err;
1084}
1085
1086static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1087                                   u64 *key_attrs, const struct nlattr **a,
1088                                   bool is_mask, bool log)
1089{
1090        int err;
1091        bool encap_valid = false;
1092
1093        err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1094                                        is_mask, log);
1095        if (err)
1096                return err;
1097
1098        encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
1099        if (encap_valid) {
1100                err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1101                                                is_mask, log);
1102                if (err)
1103                        return err;
1104        }
1105
1106        return 0;
1107}
1108
1109static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1110                                       u64 *attrs, const struct nlattr **a,
1111                                       bool is_mask, bool log)
1112{
1113        __be16 eth_type;
1114
1115        eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1116        if (is_mask) {
1117                /* Always exact match EtherType. */
1118                eth_type = htons(0xffff);
1119        } else if (!eth_proto_is_802_3(eth_type)) {
1120                OVS_NLERR(log, "EtherType %x is less than min %x",
1121                                ntohs(eth_type), ETH_P_802_3_MIN);
1122                return -EINVAL;
1123        }
1124
1125        SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1126        *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1127        return 0;
1128}
1129
1130static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1131                                 u64 *attrs, const struct nlattr **a,
1132                                 bool is_mask, bool log)
1133{
1134        u8 mac_proto = MAC_PROTO_ETHERNET;
1135
1136        if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
1137                u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1138
1139                SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1140                *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
1141        }
1142
1143        if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
1144                u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1145
1146                SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1147                *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
1148        }
1149
1150        if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1151                SW_FLOW_KEY_PUT(match, phy.priority,
1152                          nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1153                *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1154        }
1155
1156        if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1157                u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1158
1159                if (is_mask) {
1160                        in_port = 0xffffffff; /* Always exact match in_port. */
1161                } else if (in_port >= DP_MAX_PORTS) {
1162                        OVS_NLERR(log, "Port %d exceeds max allowable %d",
1163                                  in_port, DP_MAX_PORTS);
1164                        return -EINVAL;
1165                }
1166
1167                SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1168                *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1169        } else if (!is_mask) {
1170                SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1171        }
1172
1173        if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1174                uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1175
1176                SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1177                *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1178        }
1179        if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1180                if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1181                                       is_mask, log) < 0)
1182                        return -EINVAL;
1183                *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1184        }
1185
1186        if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1187            ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1188                u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1189
1190                if (ct_state & ~CT_SUPPORTED_MASK) {
1191                        OVS_NLERR(log, "ct_state flags %08x unsupported",
1192                                  ct_state);
1193                        return -EINVAL;
1194                }
1195
1196                SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1197                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1198        }
1199        if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1200            ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1201                u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1202
1203                SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1204                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1205        }
1206        if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1207            ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1208                u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1209
1210                SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1211                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1212        }
1213        if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1214            ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1215                const struct ovs_key_ct_labels *cl;
1216
1217                cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1218                SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1219                                   sizeof(*cl), is_mask);
1220                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1221        }
1222        if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1223                const struct ovs_key_ct_tuple_ipv4 *ct;
1224
1225                ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1226
1227                SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1228                SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1229                SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1230                SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1231                SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1232                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1233        }
1234        if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1235                const struct ovs_key_ct_tuple_ipv6 *ct;
1236
1237                ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1238
1239                SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1240                                   sizeof(match->key->ipv6.ct_orig.src),
1241                                   is_mask);
1242                SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1243                                   sizeof(match->key->ipv6.ct_orig.dst),
1244                                   is_mask);
1245                SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1246                SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1247                SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1248                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1249        }
1250
1251        /* For layer 3 packets the Ethernet type is provided
1252         * and treated as metadata but no MAC addresses are provided.
1253         */
1254        if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1255            (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1256                mac_proto = MAC_PROTO_NONE;
1257
1258        /* Always exact match mac_proto */
1259        SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1260
1261        if (mac_proto == MAC_PROTO_NONE)
1262                return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1263                                                   log);
1264
1265        return 0;
1266}
1267
1268int nsh_hdr_from_nlattr(const struct nlattr *attr,
1269                        struct nshhdr *nh, size_t size)
1270{
1271        struct nlattr *a;
1272        int rem;
1273        u8 flags = 0;
1274        u8 ttl = 0;
1275        int mdlen = 0;
1276
1277        /* validate_nsh has check this, so we needn't do duplicate check here
1278         */
1279        if (size < NSH_BASE_HDR_LEN)
1280                return -ENOBUFS;
1281
1282        nla_for_each_nested(a, attr, rem) {
1283                int type = nla_type(a);
1284
1285                switch (type) {
1286                case OVS_NSH_KEY_ATTR_BASE: {
1287                        const struct ovs_nsh_key_base *base = nla_data(a);
1288
1289                        flags = base->flags;
1290                        ttl = base->ttl;
1291                        nh->np = base->np;
1292                        nh->mdtype = base->mdtype;
1293                        nh->path_hdr = base->path_hdr;
1294                        break;
1295                }
1296                case OVS_NSH_KEY_ATTR_MD1:
1297                        mdlen = nla_len(a);
1298                        if (mdlen > size - NSH_BASE_HDR_LEN)
1299                                return -ENOBUFS;
1300                        memcpy(&nh->md1, nla_data(a), mdlen);
1301                        break;
1302
1303                case OVS_NSH_KEY_ATTR_MD2:
1304                        mdlen = nla_len(a);
1305                        if (mdlen > size - NSH_BASE_HDR_LEN)
1306                                return -ENOBUFS;
1307                        memcpy(&nh->md2, nla_data(a), mdlen);
1308                        break;
1309
1310                default:
1311                        return -EINVAL;
1312                }
1313        }
1314
1315        /* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
1316        nh->ver_flags_ttl_len = 0;
1317        nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1318
1319        return 0;
1320}
1321
1322int nsh_key_from_nlattr(const struct nlattr *attr,
1323                        struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1324{
1325        struct nlattr *a;
1326        int rem;
1327
1328        /* validate_nsh has check this, so we needn't do duplicate check here
1329         */
1330        nla_for_each_nested(a, attr, rem) {
1331                int type = nla_type(a);
1332
1333                switch (type) {
1334                case OVS_NSH_KEY_ATTR_BASE: {
1335                        const struct ovs_nsh_key_base *base = nla_data(a);
1336                        const struct ovs_nsh_key_base *base_mask = base + 1;
1337
1338                        nsh->base = *base;
1339                        nsh_mask->base = *base_mask;
1340                        break;
1341                }
1342                case OVS_NSH_KEY_ATTR_MD1: {
1343                        const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1344                        const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1345
1346                        memcpy(nsh->context, md1->context, sizeof(*md1));
1347                        memcpy(nsh_mask->context, md1_mask->context,
1348                               sizeof(*md1_mask));
1349                        break;
1350                }
1351                case OVS_NSH_KEY_ATTR_MD2:
1352                        /* Not supported yet */
1353                        return -ENOTSUPP;
1354                default:
1355                        return -EINVAL;
1356                }
1357        }
1358
1359        return 0;
1360}
1361
1362static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1363                                   struct sw_flow_match *match, bool is_mask,
1364                                   bool is_push_nsh, bool log)
1365{
1366        struct nlattr *a;
1367        int rem;
1368        bool has_base = false;
1369        bool has_md1 = false;
1370        bool has_md2 = false;
1371        u8 mdtype = 0;
1372        int mdlen = 0;
1373
1374        if (WARN_ON(is_push_nsh && is_mask))
1375                return -EINVAL;
1376
1377        nla_for_each_nested(a, attr, rem) {
1378                int type = nla_type(a);
1379                int i;
1380
1381                if (type > OVS_NSH_KEY_ATTR_MAX) {
1382                        OVS_NLERR(log, "nsh attr %d is out of range max %d",
1383                                  type, OVS_NSH_KEY_ATTR_MAX);
1384                        return -EINVAL;
1385                }
1386
1387                if (!check_attr_len(nla_len(a),
1388                                    ovs_nsh_key_attr_lens[type].len)) {
1389                        OVS_NLERR(
1390                            log,
1391                            "nsh attr %d has unexpected len %d expected %d",
1392                            type,
1393                            nla_len(a),
1394                            ovs_nsh_key_attr_lens[type].len
1395                        );
1396                        return -EINVAL;
1397                }
1398
1399                switch (type) {
1400                case OVS_NSH_KEY_ATTR_BASE: {
1401                        const struct ovs_nsh_key_base *base = nla_data(a);
1402
1403                        has_base = true;
1404                        mdtype = base->mdtype;
1405                        SW_FLOW_KEY_PUT(match, nsh.base.flags,
1406                                        base->flags, is_mask);
1407                        SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1408                                        base->ttl, is_mask);
1409                        SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1410                                        base->mdtype, is_mask);
1411                        SW_FLOW_KEY_PUT(match, nsh.base.np,
1412                                        base->np, is_mask);
1413                        SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1414                                        base->path_hdr, is_mask);
1415                        break;
1416                }
1417                case OVS_NSH_KEY_ATTR_MD1: {
1418                        const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1419
1420                        has_md1 = true;
1421                        for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1422                                SW_FLOW_KEY_PUT(match, nsh.context[i],
1423                                                md1->context[i], is_mask);
1424                        break;
1425                }
1426                case OVS_NSH_KEY_ATTR_MD2:
1427                        if (!is_push_nsh) /* Not supported MD type 2 yet */
1428                                return -ENOTSUPP;
1429
1430                        has_md2 = true;
1431                        mdlen = nla_len(a);
1432                        if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1433                                OVS_NLERR(
1434                                    log,
1435                                    "Invalid MD length %d for MD type %d",
1436                                    mdlen,
1437                                    mdtype
1438                                );
1439                                return -EINVAL;
1440                        }
1441                        break;
1442                default:
1443                        OVS_NLERR(log, "Unknown nsh attribute %d",
1444                                  type);
1445                        return -EINVAL;
1446                }
1447        }
1448
1449        if (rem > 0) {
1450                OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1451                return -EINVAL;
1452        }
1453
1454        if (has_md1 && has_md2) {
1455                OVS_NLERR(
1456                    1,
1457                    "invalid nsh attribute: md1 and md2 are exclusive."
1458                );
1459                return -EINVAL;
1460        }
1461
1462        if (!is_mask) {
1463                if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1464                    (has_md2 && mdtype != NSH_M_TYPE2)) {
1465                        OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1466                                  mdtype);
1467                        return -EINVAL;
1468                }
1469
1470                if (is_push_nsh &&
1471                    (!has_base || (!has_md1 && !has_md2))) {
1472                        OVS_NLERR(
1473                            1,
1474                            "push_nsh: missing base or metadata attributes"
1475                        );
1476                        return -EINVAL;
1477                }
1478        }
1479
1480        return 0;
1481}
1482
1483static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1484                                u64 attrs, const struct nlattr **a,
1485                                bool is_mask, bool log)
1486{
1487        int err;
1488
1489        err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1490        if (err)
1491                return err;
1492
1493        if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1494                const struct ovs_key_ethernet *eth_key;
1495
1496                eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1497                SW_FLOW_KEY_MEMCPY(match, eth.src,
1498                                eth_key->eth_src, ETH_ALEN, is_mask);
1499                SW_FLOW_KEY_MEMCPY(match, eth.dst,
1500                                eth_key->eth_dst, ETH_ALEN, is_mask);
1501                attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1502
1503                if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1504                        /* VLAN attribute is always parsed before getting here since it
1505                         * may occur multiple times.
1506                         */
1507                        OVS_NLERR(log, "VLAN attribute unexpected.");
1508                        return -EINVAL;
1509                }
1510
1511                if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1512                        err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1513                                                          log);
1514                        if (err)
1515                                return err;
1516                } else if (!is_mask) {
1517                        SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1518                }
1519        } else if (!match->key->eth.type) {
1520                OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1521                return -EINVAL;
1522        }
1523
1524        if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1525                const struct ovs_key_ipv4 *ipv4_key;
1526
1527                ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1528                if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1529                        OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1530                                  ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1531                        return -EINVAL;
1532                }
1533                SW_FLOW_KEY_PUT(match, ip.proto,
1534                                ipv4_key->ipv4_proto, is_mask);
1535                SW_FLOW_KEY_PUT(match, ip.tos,
1536                                ipv4_key->ipv4_tos, is_mask);
1537                SW_FLOW_KEY_PUT(match, ip.ttl,
1538                                ipv4_key->ipv4_ttl, is_mask);
1539                SW_FLOW_KEY_PUT(match, ip.frag,
1540                                ipv4_key->ipv4_frag, is_mask);
1541                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1542                                ipv4_key->ipv4_src, is_mask);
1543                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1544                                ipv4_key->ipv4_dst, is_mask);
1545                attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1546        }
1547
1548        if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1549                const struct ovs_key_ipv6 *ipv6_key;
1550
1551                ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1552                if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1553                        OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1554                                  ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1555                        return -EINVAL;
1556                }
1557
1558                if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1559                        OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1560                                  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1561                        return -EINVAL;
1562                }
1563
1564                SW_FLOW_KEY_PUT(match, ipv6.label,
1565                                ipv6_key->ipv6_label, is_mask);
1566                SW_FLOW_KEY_PUT(match, ip.proto,
1567                                ipv6_key->ipv6_proto, is_mask);
1568                SW_FLOW_KEY_PUT(match, ip.tos,
1569                                ipv6_key->ipv6_tclass, is_mask);
1570                SW_FLOW_KEY_PUT(match, ip.ttl,
1571                                ipv6_key->ipv6_hlimit, is_mask);
1572                SW_FLOW_KEY_PUT(match, ip.frag,
1573                                ipv6_key->ipv6_frag, is_mask);
1574                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1575                                ipv6_key->ipv6_src,
1576                                sizeof(match->key->ipv6.addr.src),
1577                                is_mask);
1578                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1579                                ipv6_key->ipv6_dst,
1580                                sizeof(match->key->ipv6.addr.dst),
1581                                is_mask);
1582
1583                attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1584        }
1585
1586        if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1587                const struct ovs_key_arp *arp_key;
1588
1589                arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1590                if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1591                        OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1592                                  arp_key->arp_op);
1593                        return -EINVAL;
1594                }
1595
1596                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1597                                arp_key->arp_sip, is_mask);
1598                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1599                        arp_key->arp_tip, is_mask);
1600                SW_FLOW_KEY_PUT(match, ip.proto,
1601                                ntohs(arp_key->arp_op), is_mask);
1602                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1603                                arp_key->arp_sha, ETH_ALEN, is_mask);
1604                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1605                                arp_key->arp_tha, ETH_ALEN, is_mask);
1606
1607                attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1608        }
1609
1610        if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1611                if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1612                                            is_mask, false, log) < 0)
1613                        return -EINVAL;
1614                attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1615        }
1616
1617        if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1618                const struct ovs_key_mpls *mpls_key;
1619
1620                mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1621                SW_FLOW_KEY_PUT(match, mpls.top_lse,
1622                                mpls_key->mpls_lse, is_mask);
1623
1624                attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1625         }
1626
1627        if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1628                const struct ovs_key_tcp *tcp_key;
1629
1630                tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1631                SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1632                SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1633                attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1634        }
1635
1636        if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1637                SW_FLOW_KEY_PUT(match, tp.flags,
1638                                nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1639                                is_mask);
1640                attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1641        }
1642
1643        if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1644                const struct ovs_key_udp *udp_key;
1645
1646                udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1647                SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1648                SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1649                attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1650        }
1651
1652        if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1653                const struct ovs_key_sctp *sctp_key;
1654
1655                sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1656                SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1657                SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1658                attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1659        }
1660
1661        if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1662                const struct ovs_key_icmp *icmp_key;
1663
1664                icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1665                SW_FLOW_KEY_PUT(match, tp.src,
1666                                htons(icmp_key->icmp_type), is_mask);
1667                SW_FLOW_KEY_PUT(match, tp.dst,
1668                                htons(icmp_key->icmp_code), is_mask);
1669                attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1670        }
1671
1672        if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1673                const struct ovs_key_icmpv6 *icmpv6_key;
1674
1675                icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1676                SW_FLOW_KEY_PUT(match, tp.src,
1677                                htons(icmpv6_key->icmpv6_type), is_mask);
1678                SW_FLOW_KEY_PUT(match, tp.dst,
1679                                htons(icmpv6_key->icmpv6_code), is_mask);
1680                attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1681        }
1682
1683        if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1684                const struct ovs_key_nd *nd_key;
1685
1686                nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1687                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1688                        nd_key->nd_target,
1689                        sizeof(match->key->ipv6.nd.target),
1690                        is_mask);
1691                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1692                        nd_key->nd_sll, ETH_ALEN, is_mask);
1693                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1694                                nd_key->nd_tll, ETH_ALEN, is_mask);
1695                attrs &= ~(1 << OVS_KEY_ATTR_ND);
1696        }
1697
1698        if (attrs != 0) {
1699                OVS_NLERR(log, "Unknown key attributes %llx",
1700                          (unsigned long long)attrs);
1701                return -EINVAL;
1702        }
1703
1704        return 0;
1705}
1706
1707static void nlattr_set(struct nlattr *attr, u8 val,
1708                       const struct ovs_len_tbl *tbl)
1709{
1710        struct nlattr *nla;
1711        int rem;
1712
1713        /* The nlattr stream should already have been validated */
1714        nla_for_each_nested(nla, attr, rem) {
1715                if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
1716                        nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
1717                else
1718                        memset(nla_data(nla), val, nla_len(nla));
1719
1720                if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1721                        *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1722        }
1723}
1724
1725static void mask_set_nlattr(struct nlattr *attr, u8 val)
1726{
1727        nlattr_set(attr, val, ovs_key_lens);
1728}
1729
1730/**
1731 * ovs_nla_get_match - parses Netlink attributes into a flow key and
1732 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1733 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1734 * does not include any don't care bit.
1735 * @net: Used to determine per-namespace field support.
1736 * @match: receives the extracted flow match information.
1737 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1738 * sequence. The fields should of the packet that triggered the creation
1739 * of this flow.
1740 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1741 * attribute specifies the mask field of the wildcarded flow.
1742 * @log: Boolean to allow kernel error logging.  Normally true, but when
1743 * probing for feature compatibility this should be passed in as false to
1744 * suppress unnecessary error logging.
1745 */
1746int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1747                      const struct nlattr *nla_key,
1748                      const struct nlattr *nla_mask,
1749                      bool log)
1750{
1751        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1752        struct nlattr *newmask = NULL;
1753        u64 key_attrs = 0;
1754        u64 mask_attrs = 0;
1755        int err;
1756
1757        err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1758        if (err)
1759                return err;
1760
1761        err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1762        if (err)
1763                return err;
1764
1765        err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1766        if (err)
1767                return err;
1768
1769        if (match->mask) {
1770                if (!nla_mask) {
1771                        /* Create an exact match mask. We need to set to 0xff
1772                         * all the 'match->mask' fields that have been touched
1773                         * in 'match->key'. We cannot simply memset
1774                         * 'match->mask', because padding bytes and fields not
1775                         * specified in 'match->key' should be left to 0.
1776                         * Instead, we use a stream of netlink attributes,
1777                         * copied from 'key' and set to 0xff.
1778                         * ovs_key_from_nlattrs() will take care of filling
1779                         * 'match->mask' appropriately.
1780                         */
1781                        newmask = kmemdup(nla_key,
1782                                          nla_total_size(nla_len(nla_key)),
1783                                          GFP_KERNEL);
1784                        if (!newmask)
1785                                return -ENOMEM;
1786
1787                        mask_set_nlattr(newmask, 0xff);
1788
1789                        /* The userspace does not send tunnel attributes that
1790                         * are 0, but we should not wildcard them nonetheless.
1791                         */
1792                        if (match->key->tun_proto)
1793                                SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1794                                                         0xff, true);
1795
1796                        nla_mask = newmask;
1797                }
1798
1799                err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1800                if (err)
1801                        goto free_newmask;
1802
1803                /* Always match on tci. */
1804                SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1805                SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1806
1807                err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1808                if (err)
1809                        goto free_newmask;
1810
1811                err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1812                                           log);
1813                if (err)
1814                        goto free_newmask;
1815        }
1816
1817        if (!match_validate(match, key_attrs, mask_attrs, log))
1818                err = -EINVAL;
1819
1820free_newmask:
1821        kfree(newmask);
1822        return err;
1823}
1824
1825static size_t get_ufid_len(const struct nlattr *attr, bool log)
1826{
1827        size_t len;
1828
1829        if (!attr)
1830                return 0;
1831
1832        len = nla_len(attr);
1833        if (len < 1 || len > MAX_UFID_LENGTH) {
1834                OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1835                          nla_len(attr), MAX_UFID_LENGTH);
1836                return 0;
1837        }
1838
1839        return len;
1840}
1841
1842/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1843 * or false otherwise.
1844 */
1845bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1846                      bool log)
1847{
1848        sfid->ufid_len = get_ufid_len(attr, log);
1849        if (sfid->ufid_len)
1850                memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1851
1852        return sfid->ufid_len;
1853}
1854
1855int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1856                           const struct sw_flow_key *key, bool log)
1857{
1858        struct sw_flow_key *new_key;
1859
1860        if (ovs_nla_get_ufid(sfid, ufid, log))
1861                return 0;
1862
1863        /* If UFID was not provided, use unmasked key. */
1864        new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1865        if (!new_key)
1866                return -ENOMEM;
1867        memcpy(new_key, key, sizeof(*key));
1868        sfid->unmasked_key = new_key;
1869
1870        return 0;
1871}
1872
1873u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1874{
1875        return attr ? nla_get_u32(attr) : 0;
1876}
1877
1878/**
1879 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1880 * @net: Network namespace.
1881 * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1882 * metadata.
1883 * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1884 * attributes.
1885 * @attrs: Bit mask for the netlink attributes included in @a.
1886 * @log: Boolean to allow kernel error logging.  Normally true, but when
1887 * probing for feature compatibility this should be passed in as false to
1888 * suppress unnecessary error logging.
1889 *
1890 * This parses a series of Netlink attributes that form a flow key, which must
1891 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1892 * get the metadata, that is, the parts of the flow key that cannot be
1893 * extracted from the packet itself.
1894 *
1895 * This must be called before the packet key fields are filled in 'key'.
1896 */
1897
1898int ovs_nla_get_flow_metadata(struct net *net,
1899                              const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1900                              u64 attrs, struct sw_flow_key *key, bool log)
1901{
1902        struct sw_flow_match match;
1903
1904        memset(&match, 0, sizeof(match));
1905        match.key = key;
1906
1907        key->ct_state = 0;
1908        key->ct_zone = 0;
1909        key->ct_orig_proto = 0;
1910        memset(&key->ct, 0, sizeof(key->ct));
1911        memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1912        memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1913
1914        key->phy.in_port = DP_MAX_PORTS;
1915
1916        return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1917}
1918
1919static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1920                            bool is_mask)
1921{
1922        __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1923
1924        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1925            nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1926                return -EMSGSIZE;
1927        return 0;
1928}
1929
1930static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1931                             struct sk_buff *skb)
1932{
1933        struct nlattr *start;
1934
1935        start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1936        if (!start)
1937                return -EMSGSIZE;
1938
1939        if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1940                goto nla_put_failure;
1941
1942        if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1943                if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1944                            sizeof(nsh->context), nsh->context))
1945                        goto nla_put_failure;
1946        }
1947
1948        /* Don't support MD type 2 yet */
1949
1950        nla_nest_end(skb, start);
1951
1952        return 0;
1953
1954nla_put_failure:
1955        return -EMSGSIZE;
1956}
1957
1958static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1959                             const struct sw_flow_key *output, bool is_mask,
1960                             struct sk_buff *skb)
1961{
1962        struct ovs_key_ethernet *eth_key;
1963        struct nlattr *nla;
1964        struct nlattr *encap = NULL;
1965        struct nlattr *in_encap = NULL;
1966
1967        if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1968                goto nla_put_failure;
1969
1970        if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1971                goto nla_put_failure;
1972
1973        if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1974                goto nla_put_failure;
1975
1976        if ((swkey->tun_proto || is_mask)) {
1977                const void *opts = NULL;
1978
1979                if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1980                        opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1981
1982                if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1983                                     swkey->tun_opts_len, swkey->tun_proto))
1984                        goto nla_put_failure;
1985        }
1986
1987        if (swkey->phy.in_port == DP_MAX_PORTS) {
1988                if (is_mask && (output->phy.in_port == 0xffff))
1989                        if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1990                                goto nla_put_failure;
1991        } else {
1992                u16 upper_u16;
1993                upper_u16 = !is_mask ? 0 : 0xffff;
1994
1995                if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1996                                (upper_u16 << 16) | output->phy.in_port))
1997                        goto nla_put_failure;
1998        }
1999
2000        if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2001                goto nla_put_failure;
2002
2003        if (ovs_ct_put_key(swkey, output, skb))
2004                goto nla_put_failure;
2005
2006        if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2007                nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2008                if (!nla)
2009                        goto nla_put_failure;
2010
2011                eth_key = nla_data(nla);
2012                ether_addr_copy(eth_key->eth_src, output->eth.src);
2013                ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2014
2015                if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2016                        if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2017                                goto nla_put_failure;
2018                        encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2019                        if (!swkey->eth.vlan.tci)
2020                                goto unencap;
2021
2022                        if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2023                                if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2024                                        goto nla_put_failure;
2025                                in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2026                                if (!swkey->eth.cvlan.tci)
2027                                        goto unencap;
2028                        }
2029                }
2030
2031                if (swkey->eth.type == htons(ETH_P_802_2)) {
2032                        /*
2033                        * Ethertype 802.2 is represented in the netlink with omitted
2034                        * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2035                        * 0xffff in the mask attribute.  Ethertype can also
2036                        * be wildcarded.
2037                        */
2038                        if (is_mask && output->eth.type)
2039                                if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2040                                                        output->eth.type))
2041                                        goto nla_put_failure;
2042                        goto unencap;
2043                }
2044        }
2045
2046        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2047                goto nla_put_failure;
2048
2049        if (eth_type_vlan(swkey->eth.type)) {
2050                /* There are 3 VLAN tags, we don't know anything about the rest
2051                 * of the packet, so truncate here.
2052                 */
2053                WARN_ON_ONCE(!(encap && in_encap));
2054                goto unencap;
2055        }
2056
2057        if (swkey->eth.type == htons(ETH_P_IP)) {
2058                struct ovs_key_ipv4 *ipv4_key;
2059
2060                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2061                if (!nla)
2062                        goto nla_put_failure;
2063                ipv4_key = nla_data(nla);
2064                ipv4_key->ipv4_src = output->ipv4.addr.src;
2065                ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2066                ipv4_key->ipv4_proto = output->ip.proto;
2067                ipv4_key->ipv4_tos = output->ip.tos;
2068                ipv4_key->ipv4_ttl = output->ip.ttl;
2069                ipv4_key->ipv4_frag = output->ip.frag;
2070        } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2071                struct ovs_key_ipv6 *ipv6_key;
2072
2073                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2074                if (!nla)
2075                        goto nla_put_failure;
2076                ipv6_key = nla_data(nla);
2077                memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2078                                sizeof(ipv6_key->ipv6_src));
2079                memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2080                                sizeof(ipv6_key->ipv6_dst));
2081                ipv6_key->ipv6_label = output->ipv6.label;
2082                ipv6_key->ipv6_proto = output->ip.proto;
2083                ipv6_key->ipv6_tclass = output->ip.tos;
2084                ipv6_key->ipv6_hlimit = output->ip.ttl;
2085                ipv6_key->ipv6_frag = output->ip.frag;
2086        } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2087                if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2088                        goto nla_put_failure;
2089        } else if (swkey->eth.type == htons(ETH_P_ARP) ||
2090                   swkey->eth.type == htons(ETH_P_RARP)) {
2091                struct ovs_key_arp *arp_key;
2092
2093                nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2094                if (!nla)
2095                        goto nla_put_failure;
2096                arp_key = nla_data(nla);
2097                memset(arp_key, 0, sizeof(struct ovs_key_arp));
2098                arp_key->arp_sip = output->ipv4.addr.src;
2099                arp_key->arp_tip = output->ipv4.addr.dst;
2100                arp_key->arp_op = htons(output->ip.proto);
2101                ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2102                ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2103        } else if (eth_p_mpls(swkey->eth.type)) {
2104                struct ovs_key_mpls *mpls_key;
2105
2106                nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
2107                if (!nla)
2108                        goto nla_put_failure;
2109                mpls_key = nla_data(nla);
2110                mpls_key->mpls_lse = output->mpls.top_lse;
2111        }
2112
2113        if ((swkey->eth.type == htons(ETH_P_IP) ||
2114             swkey->eth.type == htons(ETH_P_IPV6)) &&
2115             swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2116
2117                if (swkey->ip.proto == IPPROTO_TCP) {
2118                        struct ovs_key_tcp *tcp_key;
2119
2120                        nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2121                        if (!nla)
2122                                goto nla_put_failure;
2123                        tcp_key = nla_data(nla);
2124                        tcp_key->tcp_src = output->tp.src;
2125                        tcp_key->tcp_dst = output->tp.dst;
2126                        if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2127                                         output->tp.flags))
2128                                goto nla_put_failure;
2129                } else if (swkey->ip.proto == IPPROTO_UDP) {
2130                        struct ovs_key_udp *udp_key;
2131
2132                        nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2133                        if (!nla)
2134                                goto nla_put_failure;
2135                        udp_key = nla_data(nla);
2136                        udp_key->udp_src = output->tp.src;
2137                        udp_key->udp_dst = output->tp.dst;
2138                } else if (swkey->ip.proto == IPPROTO_SCTP) {
2139                        struct ovs_key_sctp *sctp_key;
2140
2141                        nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2142                        if (!nla)
2143                                goto nla_put_failure;
2144                        sctp_key = nla_data(nla);
2145                        sctp_key->sctp_src = output->tp.src;
2146                        sctp_key->sctp_dst = output->tp.dst;
2147                } else if (swkey->eth.type == htons(ETH_P_IP) &&
2148                           swkey->ip.proto == IPPROTO_ICMP) {
2149                        struct ovs_key_icmp *icmp_key;
2150
2151                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2152                        if (!nla)
2153                                goto nla_put_failure;
2154                        icmp_key = nla_data(nla);
2155                        icmp_key->icmp_type = ntohs(output->tp.src);
2156                        icmp_key->icmp_code = ntohs(output->tp.dst);
2157                } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2158                           swkey->ip.proto == IPPROTO_ICMPV6) {
2159                        struct ovs_key_icmpv6 *icmpv6_key;
2160
2161                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2162                                                sizeof(*icmpv6_key));
2163                        if (!nla)
2164                                goto nla_put_failure;
2165                        icmpv6_key = nla_data(nla);
2166                        icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2167                        icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2168
2169                        if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
2170                            icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
2171                                struct ovs_key_nd *nd_key;
2172
2173                                nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2174                                if (!nla)
2175                                        goto nla_put_failure;
2176                                nd_key = nla_data(nla);
2177                                memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2178                                                        sizeof(nd_key->nd_target));
2179                                ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2180                                ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2181                        }
2182                }
2183        }
2184
2185unencap:
2186        if (in_encap)
2187                nla_nest_end(skb, in_encap);
2188        if (encap)
2189                nla_nest_end(skb, encap);
2190
2191        return 0;
2192
2193nla_put_failure:
2194        return -EMSGSIZE;
2195}
2196
2197int ovs_nla_put_key(const struct sw_flow_key *swkey,
2198                    const struct sw_flow_key *output, int attr, bool is_mask,
2199                    struct sk_buff *skb)
2200{
2201        int err;
2202        struct nlattr *nla;
2203
2204        nla = nla_nest_start(skb, attr);
2205        if (!nla)
2206                return -EMSGSIZE;
2207        err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2208        if (err)
2209                return err;
2210        nla_nest_end(skb, nla);
2211
2212        return 0;
2213}
2214
2215/* Called with ovs_mutex or RCU read lock. */
2216int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2217{
2218        if (ovs_identifier_is_ufid(&flow->id))
2219                return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2220                               flow->id.ufid);
2221
2222        return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2223                               OVS_FLOW_ATTR_KEY, false, skb);
2224}
2225
2226/* Called with ovs_mutex or RCU read lock. */
2227int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2228{
2229        return ovs_nla_put_key(&flow->key, &flow->key,
2230                                OVS_FLOW_ATTR_KEY, false, skb);
2231}
2232
2233/* Called with ovs_mutex or RCU read lock. */
2234int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2235{
2236        return ovs_nla_put_key(&flow->key, &flow->mask->key,
2237                                OVS_FLOW_ATTR_MASK, true, skb);
2238}
2239
2240#define MAX_ACTIONS_BUFSIZE     (32 * 1024)
2241
2242static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2243{
2244        struct sw_flow_actions *sfa;
2245
2246        WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2247
2248        sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2249        if (!sfa)
2250                return ERR_PTR(-ENOMEM);
2251
2252        sfa->actions_len = 0;
2253        return sfa;
2254}
2255
2256static void ovs_nla_free_set_action(const struct nlattr *a)
2257{
2258        const struct nlattr *ovs_key = nla_data(a);
2259        struct ovs_tunnel_info *ovs_tun;
2260
2261        switch (nla_type(ovs_key)) {
2262        case OVS_KEY_ATTR_TUNNEL_INFO:
2263                ovs_tun = nla_data(ovs_key);
2264                dst_release((struct dst_entry *)ovs_tun->tun_dst);
2265                break;
2266        }
2267}
2268
2269void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2270{
2271        const struct nlattr *a;
2272        int rem;
2273
2274        if (!sf_acts)
2275                return;
2276
2277        nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
2278                switch (nla_type(a)) {
2279                case OVS_ACTION_ATTR_SET:
2280                        ovs_nla_free_set_action(a);
2281                        break;
2282                case OVS_ACTION_ATTR_CT:
2283                        ovs_ct_free_action(a);
2284                        break;
2285                }
2286        }
2287
2288        kfree(sf_acts);
2289}
2290
2291static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2292{
2293        ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2294}
2295
2296/* Schedules 'sf_acts' to be freed after the next RCU grace period.
2297 * The caller must hold rcu_read_lock for this to be sensible. */
2298void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2299{
2300        call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2301}
2302
2303static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2304                                       int attr_len, bool log)
2305{
2306
2307        struct sw_flow_actions *acts;
2308        int new_acts_size;
2309        int req_size = NLA_ALIGN(attr_len);
2310        int next_offset = offsetof(struct sw_flow_actions, actions) +
2311                                        (*sfa)->actions_len;
2312
2313        if (req_size <= (ksize(*sfa) - next_offset))
2314                goto out;
2315
2316        new_acts_size = ksize(*sfa) * 2;
2317
2318        if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2319                if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
2320                        OVS_NLERR(log, "Flow action size exceeds max %u",
2321                                  MAX_ACTIONS_BUFSIZE);
2322                        return ERR_PTR(-EMSGSIZE);
2323                }
2324                new_acts_size = MAX_ACTIONS_BUFSIZE;
2325        }
2326
2327        acts = nla_alloc_flow_actions(new_acts_size);
2328        if (IS_ERR(acts))
2329                return (void *)acts;
2330
2331        memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2332        acts->actions_len = (*sfa)->actions_len;
2333        acts->orig_len = (*sfa)->orig_len;
2334        kfree(*sfa);
2335        *sfa = acts;
2336
2337out:
2338        (*sfa)->actions_len += req_size;
2339        return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2340}
2341
2342static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2343                                   int attrtype, void *data, int len, bool log)
2344{
2345        struct nlattr *a;
2346
2347        a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2348        if (IS_ERR(a))
2349                return a;
2350
2351        a->nla_type = attrtype;
2352        a->nla_len = nla_attr_size(len);
2353
2354        if (data)
2355                memcpy(nla_data(a), data, len);
2356        memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2357
2358        return a;
2359}
2360
2361int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2362                       int len, bool log)
2363{
2364        struct nlattr *a;
2365
2366        a = __add_action(sfa, attrtype, data, len, log);
2367
2368        return PTR_ERR_OR_ZERO(a);
2369}
2370
2371static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2372                                          int attrtype, bool log)
2373{
2374        int used = (*sfa)->actions_len;
2375        int err;
2376
2377        err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2378        if (err)
2379                return err;
2380
2381        return used;
2382}
2383
2384static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2385                                         int st_offset)
2386{
2387        struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2388                                                               st_offset);
2389
2390        a->nla_len = sfa->actions_len - st_offset;
2391}
2392
2393static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2394                                  const struct sw_flow_key *key,
2395                                  struct sw_flow_actions **sfa,
2396                                  __be16 eth_type, __be16 vlan_tci, bool log);
2397
2398static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2399                                    const struct sw_flow_key *key,
2400                                    struct sw_flow_actions **sfa,
2401                                    __be16 eth_type, __be16 vlan_tci,
2402                                    bool log, bool last)
2403{
2404        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2405        const struct nlattr *probability, *actions;
2406        const struct nlattr *a;
2407        int rem, start, err;
2408        struct sample_arg arg;
2409
2410        memset(attrs, 0, sizeof(attrs));
2411        nla_for_each_nested(a, attr, rem) {
2412                int type = nla_type(a);
2413                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2414                        return -EINVAL;
2415                attrs[type] = a;
2416        }
2417        if (rem)
2418                return -EINVAL;
2419
2420        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2421        if (!probability || nla_len(probability) != sizeof(u32))
2422                return -EINVAL;
2423
2424        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2425        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2426                return -EINVAL;
2427
2428        /* validation done, copy sample action. */
2429        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2430        if (start < 0)
2431                return start;
2432
2433        /* When both skb and flow may be changed, put the sample
2434         * into a deferred fifo. On the other hand, if only skb
2435         * may be modified, the actions can be executed in place.
2436         *
2437         * Do this analysis at the flow installation time.
2438         * Set 'clone_action->exec' to true if the actions can be
2439         * executed without being deferred.
2440         *
2441         * If the sample is the last action, it can always be excuted
2442         * rather than deferred.
2443         */
2444        arg.exec = last || !actions_may_change_flow(actions);
2445        arg.probability = nla_get_u32(probability);
2446
2447        err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2448                                 log);
2449        if (err)
2450                return err;
2451
2452        err = __ovs_nla_copy_actions(net, actions, key, sfa,
2453                                     eth_type, vlan_tci, log);
2454
2455        if (err)
2456                return err;
2457
2458        add_nested_action_end(*sfa, start);
2459
2460        return 0;
2461}
2462
2463static int validate_and_copy_clone(struct net *net,
2464                                   const struct nlattr *attr,
2465                                   const struct sw_flow_key *key,
2466                                   struct sw_flow_actions **sfa,
2467                                   __be16 eth_type, __be16 vlan_tci,
2468                                   bool log, bool last)
2469{
2470        int start, err;
2471        u32 exec;
2472
2473        if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
2474                return -EINVAL;
2475
2476        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
2477        if (start < 0)
2478                return start;
2479
2480        exec = last || !actions_may_change_flow(attr);
2481
2482        err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
2483                                 sizeof(exec), log);
2484        if (err)
2485                return err;
2486
2487        err = __ovs_nla_copy_actions(net, attr, key, sfa,
2488                                     eth_type, vlan_tci, log);
2489        if (err)
2490                return err;
2491
2492        add_nested_action_end(*sfa, start);
2493
2494        return 0;
2495}
2496
2497void ovs_match_init(struct sw_flow_match *match,
2498                    struct sw_flow_key *key,
2499                    bool reset_key,
2500                    struct sw_flow_mask *mask)
2501{
2502        memset(match, 0, sizeof(*match));
2503        match->key = key;
2504        match->mask = mask;
2505
2506        if (reset_key)
2507                memset(key, 0, sizeof(*key));
2508
2509        if (mask) {
2510                memset(&mask->key, 0, sizeof(mask->key));
2511                mask->range.start = mask->range.end = 0;
2512        }
2513}
2514
2515static int validate_geneve_opts(struct sw_flow_key *key)
2516{
2517        struct geneve_opt *option;
2518        int opts_len = key->tun_opts_len;
2519        bool crit_opt = false;
2520
2521        option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2522        while (opts_len > 0) {
2523                int len;
2524
2525                if (opts_len < sizeof(*option))
2526                        return -EINVAL;
2527
2528                len = sizeof(*option) + option->length * 4;
2529                if (len > opts_len)
2530                        return -EINVAL;
2531
2532                crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2533
2534                option = (struct geneve_opt *)((u8 *)option + len);
2535                opts_len -= len;
2536        }
2537
2538        key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2539
2540        return 0;
2541}
2542
2543static int validate_and_copy_set_tun(const struct nlattr *attr,
2544                                     struct sw_flow_actions **sfa, bool log)
2545{
2546        struct sw_flow_match match;
2547        struct sw_flow_key key;
2548        struct metadata_dst *tun_dst;
2549        struct ip_tunnel_info *tun_info;
2550        struct ovs_tunnel_info *ovs_tun;
2551        struct nlattr *a;
2552        int err = 0, start, opts_type;
2553        __be16 dst_opt_type;
2554
2555        dst_opt_type = 0;
2556        ovs_match_init(&match, &key, true, NULL);
2557        opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2558        if (opts_type < 0)
2559                return opts_type;
2560
2561        if (key.tun_opts_len) {
2562                switch (opts_type) {
2563                case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2564                        err = validate_geneve_opts(&key);
2565                        if (err < 0)
2566                                return err;
2567                        dst_opt_type = TUNNEL_GENEVE_OPT;
2568                        break;
2569                case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2570                        dst_opt_type = TUNNEL_VXLAN_OPT;
2571                        break;
2572                case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2573                        dst_opt_type = TUNNEL_ERSPAN_OPT;
2574                        break;
2575                }
2576        }
2577
2578        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2579        if (start < 0)
2580                return start;
2581
2582        tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2583                                     GFP_KERNEL);
2584
2585        if (!tun_dst)
2586                return -ENOMEM;
2587
2588        err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2589        if (err) {
2590                dst_release((struct dst_entry *)tun_dst);
2591                return err;
2592        }
2593
2594        a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2595                         sizeof(*ovs_tun), log);
2596        if (IS_ERR(a)) {
2597                dst_release((struct dst_entry *)tun_dst);
2598                return PTR_ERR(a);
2599        }
2600
2601        ovs_tun = nla_data(a);
2602        ovs_tun->tun_dst = tun_dst;
2603
2604        tun_info = &tun_dst->u.tun_info;
2605        tun_info->mode = IP_TUNNEL_INFO_TX;
2606        if (key.tun_proto == AF_INET6)
2607                tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2608        tun_info->key = key.tun_key;
2609
2610        /* We need to store the options in the action itself since
2611         * everything else will go away after flow setup. We can append
2612         * it to tun_info and then point there.
2613         */
2614        ip_tunnel_info_opts_set(tun_info,
2615                                TUN_METADATA_OPTS(&key, key.tun_opts_len),
2616                                key.tun_opts_len, dst_opt_type);
2617        add_nested_action_end(*sfa, start);
2618
2619        return err;
2620}
2621
2622static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2623                         bool is_push_nsh, bool log)
2624{
2625        struct sw_flow_match match;
2626        struct sw_flow_key key;
2627        int ret = 0;
2628
2629        ovs_match_init(&match, &key, true, NULL);
2630        ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2631                                      is_push_nsh, log);
2632        return !ret;
2633}
2634
2635/* Return false if there are any non-masked bits set.
2636 * Mask follows data immediately, before any netlink padding.
2637 */
2638static bool validate_masked(u8 *data, int len)
2639{
2640        u8 *mask = data + len;
2641
2642        while (len--)
2643                if (*data++ & ~*mask++)
2644                        return false;
2645
2646        return true;
2647}
2648
2649static int validate_set(const struct nlattr *a,
2650                        const struct sw_flow_key *flow_key,
2651                        struct sw_flow_actions **sfa, bool *skip_copy,
2652                        u8 mac_proto, __be16 eth_type, bool masked, bool log)
2653{
2654        const struct nlattr *ovs_key = nla_data(a);
2655        int key_type = nla_type(ovs_key);
2656        size_t key_len;
2657
2658        /* There can be only one key in a action */
2659        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2660                return -EINVAL;
2661
2662        key_len = nla_len(ovs_key);
2663        if (masked)
2664                key_len /= 2;
2665
2666        if (key_type > OVS_KEY_ATTR_MAX ||
2667            !check_attr_len(key_len, ovs_key_lens[key_type].len))
2668                return -EINVAL;
2669
2670        if (masked && !validate_masked(nla_data(ovs_key), key_len))
2671                return -EINVAL;
2672
2673        switch (key_type) {
2674        const struct ovs_key_ipv4 *ipv4_key;
2675        const struct ovs_key_ipv6 *ipv6_key;
2676        int err;
2677
2678        case OVS_KEY_ATTR_PRIORITY:
2679        case OVS_KEY_ATTR_SKB_MARK:
2680        case OVS_KEY_ATTR_CT_MARK:
2681        case OVS_KEY_ATTR_CT_LABELS:
2682                break;
2683
2684        case OVS_KEY_ATTR_ETHERNET:
2685                if (mac_proto != MAC_PROTO_ETHERNET)
2686                        return -EINVAL;
2687                break;
2688
2689        case OVS_KEY_ATTR_TUNNEL:
2690                if (masked)
2691                        return -EINVAL; /* Masked tunnel set not supported. */
2692
2693                *skip_copy = true;
2694                err = validate_and_copy_set_tun(a, sfa, log);
2695                if (err)
2696                        return err;
2697                break;
2698
2699        case OVS_KEY_ATTR_IPV4:
2700                if (eth_type != htons(ETH_P_IP))
2701                        return -EINVAL;
2702
2703                ipv4_key = nla_data(ovs_key);
2704
2705                if (masked) {
2706                        const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2707
2708                        /* Non-writeable fields. */
2709                        if (mask->ipv4_proto || mask->ipv4_frag)
2710                                return -EINVAL;
2711                } else {
2712                        if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2713                                return -EINVAL;
2714
2715                        if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2716                                return -EINVAL;
2717                }
2718                break;
2719
2720        case OVS_KEY_ATTR_IPV6:
2721                if (eth_type != htons(ETH_P_IPV6))
2722                        return -EINVAL;
2723
2724                ipv6_key = nla_data(ovs_key);
2725
2726                if (masked) {
2727                        const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2728
2729                        /* Non-writeable fields. */
2730                        if (mask->ipv6_proto || mask->ipv6_frag)
2731                                return -EINVAL;
2732
2733                        /* Invalid bits in the flow label mask? */
2734                        if (ntohl(mask->ipv6_label) & 0xFFF00000)
2735                                return -EINVAL;
2736                } else {
2737                        if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2738                                return -EINVAL;
2739
2740                        if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2741                                return -EINVAL;
2742                }
2743                if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2744                        return -EINVAL;
2745
2746                break;
2747
2748        case OVS_KEY_ATTR_TCP:
2749                if ((eth_type != htons(ETH_P_IP) &&
2750                     eth_type != htons(ETH_P_IPV6)) ||
2751                    flow_key->ip.proto != IPPROTO_TCP)
2752                        return -EINVAL;
2753
2754                break;
2755
2756        case OVS_KEY_ATTR_UDP:
2757                if ((eth_type != htons(ETH_P_IP) &&
2758                     eth_type != htons(ETH_P_IPV6)) ||
2759                    flow_key->ip.proto != IPPROTO_UDP)
2760                        return -EINVAL;
2761
2762                break;
2763
2764        case OVS_KEY_ATTR_MPLS:
2765                if (!eth_p_mpls(eth_type))
2766                        return -EINVAL;
2767                break;
2768
2769        case OVS_KEY_ATTR_SCTP:
2770                if ((eth_type != htons(ETH_P_IP) &&
2771                     eth_type != htons(ETH_P_IPV6)) ||
2772                    flow_key->ip.proto != IPPROTO_SCTP)
2773                        return -EINVAL;
2774
2775                break;
2776
2777        case OVS_KEY_ATTR_NSH:
2778                if (eth_type != htons(ETH_P_NSH))
2779                        return -EINVAL;
2780                if (!validate_nsh(nla_data(a), masked, false, log))
2781                        return -EINVAL;
2782                break;
2783
2784        default:
2785                return -EINVAL;
2786        }
2787
2788        /* Convert non-masked non-tunnel set actions to masked set actions. */
2789        if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2790                int start, len = key_len * 2;
2791                struct nlattr *at;
2792
2793                *skip_copy = true;
2794
2795                start = add_nested_action_start(sfa,
2796                                                OVS_ACTION_ATTR_SET_TO_MASKED,
2797                                                log);
2798                if (start < 0)
2799                        return start;
2800
2801                at = __add_action(sfa, key_type, NULL, len, log);
2802                if (IS_ERR(at))
2803                        return PTR_ERR(at);
2804
2805                memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2806                memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
2807                /* Clear non-writeable bits from otherwise writeable fields. */
2808                if (key_type == OVS_KEY_ATTR_IPV6) {
2809                        struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2810
2811                        mask->ipv6_label &= htonl(0x000FFFFF);
2812                }
2813                add_nested_action_end(*sfa, start);
2814        }
2815
2816        return 0;
2817}
2818
2819static int validate_userspace(const struct nlattr *attr)
2820{
2821        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
2822                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
2823                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
2824                [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
2825        };
2826        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
2827        int error;
2828
2829        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
2830                                 userspace_policy, NULL);
2831        if (error)
2832                return error;
2833
2834        if (!a[OVS_USERSPACE_ATTR_PID] ||
2835            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
2836                return -EINVAL;
2837
2838        return 0;
2839}
2840
2841static int copy_action(const struct nlattr *from,
2842                       struct sw_flow_actions **sfa, bool log)
2843{
2844        int totlen = NLA_ALIGN(from->nla_len);
2845        struct nlattr *to;
2846
2847        to = reserve_sfa_size(sfa, from->nla_len, log);
2848        if (IS_ERR(to))
2849                return PTR_ERR(to);
2850
2851        memcpy(to, from, totlen);
2852        return 0;
2853}
2854
2855static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2856                                  const struct sw_flow_key *key,
2857                                  struct sw_flow_actions **sfa,
2858                                  __be16 eth_type, __be16 vlan_tci, bool log)
2859{
2860        u8 mac_proto = ovs_key_mac_proto(key);
2861        const struct nlattr *a;
2862        int rem, err;
2863
2864        nla_for_each_nested(a, attr, rem) {
2865                /* Expected argument lengths, (u32)-1 for variable length. */
2866                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2867                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2868                        [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2869                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2870                        [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2871                        [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2872                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2873                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
2874                        [OVS_ACTION_ATTR_SET] = (u32)-1,
2875                        [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2876                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2877                        [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2878                        [OVS_ACTION_ATTR_CT] = (u32)-1,
2879                        [OVS_ACTION_ATTR_CT_CLEAR] = 0,
2880                        [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2881                        [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2882                        [OVS_ACTION_ATTR_POP_ETH] = 0,
2883                        [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2884                        [OVS_ACTION_ATTR_POP_NSH] = 0,
2885                        [OVS_ACTION_ATTR_METER] = sizeof(u32),
2886                        [OVS_ACTION_ATTR_CLONE] = (u32)-1,
2887                };
2888                const struct ovs_action_push_vlan *vlan;
2889                int type = nla_type(a);
2890                bool skip_copy;
2891
2892                if (type > OVS_ACTION_ATTR_MAX ||
2893                    (action_lens[type] != nla_len(a) &&
2894                     action_lens[type] != (u32)-1))
2895                        return -EINVAL;
2896
2897                skip_copy = false;
2898                switch (type) {
2899                case OVS_ACTION_ATTR_UNSPEC:
2900                        return -EINVAL;
2901
2902                case OVS_ACTION_ATTR_USERSPACE:
2903                        err = validate_userspace(a);
2904                        if (err)
2905                                return err;
2906                        break;
2907
2908                case OVS_ACTION_ATTR_OUTPUT:
2909                        if (nla_get_u32(a) >= DP_MAX_PORTS)
2910                                return -EINVAL;
2911                        break;
2912
2913                case OVS_ACTION_ATTR_TRUNC: {
2914                        const struct ovs_action_trunc *trunc = nla_data(a);
2915
2916                        if (trunc->max_len < ETH_HLEN)
2917                                return -EINVAL;
2918                        break;
2919                }
2920
2921                case OVS_ACTION_ATTR_HASH: {
2922                        const struct ovs_action_hash *act_hash = nla_data(a);
2923
2924                        switch (act_hash->hash_alg) {
2925                        case OVS_HASH_ALG_L4:
2926                                break;
2927                        default:
2928                                return  -EINVAL;
2929                        }
2930
2931                        break;
2932                }
2933
2934                case OVS_ACTION_ATTR_POP_VLAN:
2935                        if (mac_proto != MAC_PROTO_ETHERNET)
2936                                return -EINVAL;
2937                        vlan_tci = htons(0);
2938                        break;
2939
2940                case OVS_ACTION_ATTR_PUSH_VLAN:
2941                        if (mac_proto != MAC_PROTO_ETHERNET)
2942                                return -EINVAL;
2943                        vlan = nla_data(a);
2944                        if (!eth_type_vlan(vlan->vlan_tpid))
2945                                return -EINVAL;
2946                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
2947                                return -EINVAL;
2948                        vlan_tci = vlan->vlan_tci;
2949                        break;
2950
2951                case OVS_ACTION_ATTR_RECIRC:
2952                        break;
2953
2954                case OVS_ACTION_ATTR_PUSH_MPLS: {
2955                        const struct ovs_action_push_mpls *mpls = nla_data(a);
2956
2957                        if (!eth_p_mpls(mpls->mpls_ethertype))
2958                                return -EINVAL;
2959                        /* Prohibit push MPLS other than to a white list
2960                         * for packets that have a known tag order.
2961                         */
2962                        if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2963                            (eth_type != htons(ETH_P_IP) &&
2964                             eth_type != htons(ETH_P_IPV6) &&
2965                             eth_type != htons(ETH_P_ARP) &&
2966                             eth_type != htons(ETH_P_RARP) &&
2967                             !eth_p_mpls(eth_type)))
2968                                return -EINVAL;
2969                        eth_type = mpls->mpls_ethertype;
2970                        break;
2971                }
2972
2973                case OVS_ACTION_ATTR_POP_MPLS:
2974                        if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2975                            !eth_p_mpls(eth_type))
2976                                return -EINVAL;
2977
2978                        /* Disallow subsequent L2.5+ set and mpls_pop actions
2979                         * as there is no check here to ensure that the new
2980                         * eth_type is valid and thus set actions could
2981                         * write off the end of the packet or otherwise
2982                         * corrupt it.
2983                         *
2984                         * Support for these actions is planned using packet
2985                         * recirculation.
2986                         */
2987                        eth_type = htons(0);
2988                        break;
2989
2990                case OVS_ACTION_ATTR_SET:
2991                        err = validate_set(a, key, sfa,
2992                                           &skip_copy, mac_proto, eth_type,
2993                                           false, log);
2994                        if (err)
2995                                return err;
2996                        break;
2997
2998                case OVS_ACTION_ATTR_SET_MASKED:
2999                        err = validate_set(a, key, sfa,
3000                                           &skip_copy, mac_proto, eth_type,
3001                                           true, log);
3002                        if (err)
3003                                return err;
3004                        break;
3005
3006                case OVS_ACTION_ATTR_SAMPLE: {
3007                        bool last = nla_is_last(a, rem);
3008
3009                        err = validate_and_copy_sample(net, a, key, sfa,
3010                                                       eth_type, vlan_tci,
3011                                                       log, last);
3012                        if (err)
3013                                return err;
3014                        skip_copy = true;
3015                        break;
3016                }
3017
3018                case OVS_ACTION_ATTR_CT:
3019                        err = ovs_ct_copy_action(net, a, key, sfa, log);
3020                        if (err)
3021                                return err;
3022                        skip_copy = true;
3023                        break;
3024
3025                case OVS_ACTION_ATTR_CT_CLEAR:
3026                        break;
3027
3028                case OVS_ACTION_ATTR_PUSH_ETH:
3029                        /* Disallow pushing an Ethernet header if one
3030                         * is already present */
3031                        if (mac_proto != MAC_PROTO_NONE)
3032                                return -EINVAL;
3033                        mac_proto = MAC_PROTO_ETHERNET;
3034                        break;
3035
3036                case OVS_ACTION_ATTR_POP_ETH:
3037                        if (mac_proto != MAC_PROTO_ETHERNET)
3038                                return -EINVAL;
3039                        if (vlan_tci & htons(VLAN_TAG_PRESENT))
3040                                return -EINVAL;
3041                        mac_proto = MAC_PROTO_NONE;
3042                        break;
3043
3044                case OVS_ACTION_ATTR_PUSH_NSH:
3045                        if (mac_proto != MAC_PROTO_ETHERNET) {
3046                                u8 next_proto;
3047
3048                                next_proto = tun_p_from_eth_p(eth_type);
3049                                if (!next_proto)
3050                                        return -EINVAL;
3051                        }
3052                        mac_proto = MAC_PROTO_NONE;
3053                        if (!validate_nsh(nla_data(a), false, true, true))
3054                                return -EINVAL;
3055                        break;
3056
3057                case OVS_ACTION_ATTR_POP_NSH: {
3058                        __be16 inner_proto;
3059
3060                        if (eth_type != htons(ETH_P_NSH))
3061                                return -EINVAL;
3062                        inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3063                        if (!inner_proto)
3064                                return -EINVAL;
3065                        if (key->nsh.base.np == TUN_P_ETHERNET)
3066                                mac_proto = MAC_PROTO_ETHERNET;
3067                        else
3068                                mac_proto = MAC_PROTO_NONE;
3069                        break;
3070                }
3071
3072                case OVS_ACTION_ATTR_METER:
3073                        /* Non-existent meters are simply ignored.  */
3074                        break;
3075
3076                case OVS_ACTION_ATTR_CLONE: {
3077                        bool last = nla_is_last(a, rem);
3078
3079                        err = validate_and_copy_clone(net, a, key, sfa,
3080                                                      eth_type, vlan_tci,
3081                                                      log, last);
3082                        if (err)
3083                                return err;
3084                        skip_copy = true;
3085                        break;
3086                }
3087
3088                default:
3089                        OVS_NLERR(log, "Unknown Action type %d", type);
3090                        return -EINVAL;
3091                }
3092                if (!skip_copy) {
3093                        err = copy_action(a, sfa, log);
3094                        if (err)
3095                                return err;
3096                }
3097        }
3098
3099        if (rem > 0)
3100                return -EINVAL;
3101
3102        return 0;
3103}
3104
3105/* 'key' must be the masked key. */
3106int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3107                         const struct sw_flow_key *key,
3108                         struct sw_flow_actions **sfa, bool log)
3109{
3110        int err;
3111
3112        *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3113        if (IS_ERR(*sfa))
3114                return PTR_ERR(*sfa);
3115
3116        (*sfa)->orig_len = nla_len(attr);
3117        err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3118                                     key->eth.vlan.tci, log);
3119        if (err)
3120                ovs_nla_free_flow_actions(*sfa);
3121
3122        return err;
3123}
3124
3125static int sample_action_to_attr(const struct nlattr *attr,
3126                                 struct sk_buff *skb)
3127{
3128        struct nlattr *start, *ac_start = NULL, *sample_arg;
3129        int err = 0, rem = nla_len(attr);
3130        const struct sample_arg *arg;
3131        struct nlattr *actions;
3132
3133        start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
3134        if (!start)
3135                return -EMSGSIZE;
3136
3137        sample_arg = nla_data(attr);
3138        arg = nla_data(sample_arg);
3139        actions = nla_next(sample_arg, &rem);
3140
3141        if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3142                err = -EMSGSIZE;
3143                goto out;
3144        }
3145
3146        ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
3147        if (!ac_start) {
3148                err = -EMSGSIZE;
3149                goto out;
3150        }
3151
3152        err = ovs_nla_put_actions(actions, rem, skb);
3153
3154out:
3155        if (err) {
3156                nla_nest_cancel(skb, ac_start);
3157                nla_nest_cancel(skb, start);
3158        } else {
3159                nla_nest_end(skb, ac_start);
3160                nla_nest_end(skb, start);
3161        }
3162
3163        return err;
3164}
3165
3166static int clone_action_to_attr(const struct nlattr *attr,
3167                                struct sk_buff *skb)
3168{
3169        struct nlattr *start;
3170        int err = 0, rem = nla_len(attr);
3171
3172        start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
3173        if (!start)
3174                return -EMSGSIZE;
3175
3176        err = ovs_nla_put_actions(nla_data(attr), rem, skb);
3177
3178        if (err)
3179                nla_nest_cancel(skb, start);
3180        else
3181                nla_nest_end(skb, start);
3182
3183        return err;
3184}
3185
3186static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3187{
3188        const struct nlattr *ovs_key = nla_data(a);
3189        int key_type = nla_type(ovs_key);
3190        struct nlattr *start;
3191        int err;
3192
3193        switch (key_type) {
3194        case OVS_KEY_ATTR_TUNNEL_INFO: {
3195                struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3196                struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3197
3198                start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3199                if (!start)
3200                        return -EMSGSIZE;
3201
3202                err =  ip_tun_to_nlattr(skb, &tun_info->key,
3203                                        ip_tunnel_info_opts(tun_info),
3204                                        tun_info->options_len,
3205                                        ip_tunnel_info_af(tun_info));
3206                if (err)
3207                        return err;
3208                nla_nest_end(skb, start);
3209                break;
3210        }
3211        default:
3212                if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3213                        return -EMSGSIZE;
3214                break;
3215        }
3216
3217        return 0;
3218}
3219
3220static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3221                                                struct sk_buff *skb)
3222{
3223        const struct nlattr *ovs_key = nla_data(a);
3224        struct nlattr *nla;
3225        size_t key_len = nla_len(ovs_key) / 2;
3226
3227        /* Revert the conversion we did from a non-masked set action to
3228         * masked set action.
3229         */
3230        nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3231        if (!nla)
3232                return -EMSGSIZE;
3233
3234        if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3235                return -EMSGSIZE;
3236
3237        nla_nest_end(skb, nla);
3238        return 0;
3239}
3240
3241int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3242{
3243        const struct nlattr *a;
3244        int rem, err;
3245
3246        nla_for_each_attr(a, attr, len, rem) {
3247                int type = nla_type(a);
3248
3249                switch (type) {
3250                case OVS_ACTION_ATTR_SET:
3251                        err = set_action_to_attr(a, skb);
3252                        if (err)
3253                                return err;
3254                        break;
3255
3256                case OVS_ACTION_ATTR_SET_TO_MASKED:
3257                        err = masked_set_action_to_set_action_attr(a, skb);
3258                        if (err)
3259                                return err;
3260                        break;
3261
3262                case OVS_ACTION_ATTR_SAMPLE:
3263                        err = sample_action_to_attr(a, skb);
3264                        if (err)
3265                                return err;
3266                        break;
3267
3268                case OVS_ACTION_ATTR_CT:
3269                        err = ovs_ct_action_to_attr(nla_data(a), skb);
3270                        if (err)
3271                                return err;
3272                        break;
3273
3274                case OVS_ACTION_ATTR_CLONE:
3275                        err = clone_action_to_attr(a, skb);
3276                        if (err)
3277                                return err;
3278                        break;
3279
3280                default:
3281                        if (nla_put(skb, type, nla_len(a), nla_data(a)))
3282                                return -EMSGSIZE;
3283                        break;
3284                }
3285        }
3286
3287        return 0;
3288}
3289