linux/net/openvswitch/flow.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2013 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#include "flow.h"
  20#include "datapath.h"
  21#include <linux/uaccess.h>
  22#include <linux/netdevice.h>
  23#include <linux/etherdevice.h>
  24#include <linux/if_ether.h>
  25#include <linux/if_vlan.h>
  26#include <net/llc_pdu.h>
  27#include <linux/kernel.h>
  28#include <linux/jhash.h>
  29#include <linux/jiffies.h>
  30#include <linux/llc.h>
  31#include <linux/module.h>
  32#include <linux/in.h>
  33#include <linux/rcupdate.h>
  34#include <linux/if_arp.h>
  35#include <linux/ip.h>
  36#include <linux/ipv6.h>
  37#include <linux/sctp.h>
  38#include <linux/tcp.h>
  39#include <linux/udp.h>
  40#include <linux/icmp.h>
  41#include <linux/icmpv6.h>
  42#include <linux/rculist.h>
  43#include <net/ip.h>
  44#include <net/ip_tunnels.h>
  45#include <net/ipv6.h>
  46#include <net/ndisc.h>
  47
  48static struct kmem_cache *flow_cache;
  49
  50static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
  51                struct sw_flow_key_range *range, u8 val);
  52
  53static void update_range__(struct sw_flow_match *match,
  54                          size_t offset, size_t size, bool is_mask)
  55{
  56        struct sw_flow_key_range *range = NULL;
  57        size_t start = rounddown(offset, sizeof(long));
  58        size_t end = roundup(offset + size, sizeof(long));
  59
  60        if (!is_mask)
  61                range = &match->range;
  62        else if (match->mask)
  63                range = &match->mask->range;
  64
  65        if (!range)
  66                return;
  67
  68        if (range->start == range->end) {
  69                range->start = start;
  70                range->end = end;
  71                return;
  72        }
  73
  74        if (range->start > start)
  75                range->start = start;
  76
  77        if (range->end < end)
  78                range->end = end;
  79}
  80
  81#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
  82        do { \
  83                update_range__(match, offsetof(struct sw_flow_key, field),  \
  84                                     sizeof((match)->key->field), is_mask); \
  85                if (is_mask) {                                              \
  86                        if ((match)->mask)                                  \
  87                                (match)->mask->key.field = value;           \
  88                } else {                                                    \
  89                        (match)->key->field = value;                        \
  90                }                                                           \
  91        } while (0)
  92
  93#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
  94        do { \
  95                update_range__(match, offsetof(struct sw_flow_key, field),  \
  96                                len, is_mask);                              \
  97                if (is_mask) {                                              \
  98                        if ((match)->mask)                                  \
  99                                memcpy(&(match)->mask->key.field, value_p, len);\
 100                } else {                                                    \
 101                        memcpy(&(match)->key->field, value_p, len);         \
 102                }                                                           \
 103        } while (0)
 104
 105static u16 range_n_bytes(const struct sw_flow_key_range *range)
 106{
 107        return range->end - range->start;
 108}
 109
 110void ovs_match_init(struct sw_flow_match *match,
 111                    struct sw_flow_key *key,
 112                    struct sw_flow_mask *mask)
 113{
 114        memset(match, 0, sizeof(*match));
 115        match->key = key;
 116        match->mask = mask;
 117
 118        memset(key, 0, sizeof(*key));
 119
 120        if (mask) {
 121                memset(&mask->key, 0, sizeof(mask->key));
 122                mask->range.start = mask->range.end = 0;
 123        }
 124}
 125
 126static bool ovs_match_validate(const struct sw_flow_match *match,
 127                u64 key_attrs, u64 mask_attrs)
 128{
 129        u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
 130        u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 131
 132        /* The following mask attributes allowed only if they
 133         * pass the validation tests. */
 134        mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
 135                        | (1 << OVS_KEY_ATTR_IPV6)
 136                        | (1 << OVS_KEY_ATTR_TCP)
 137                        | (1 << OVS_KEY_ATTR_UDP)
 138                        | (1 << OVS_KEY_ATTR_SCTP)
 139                        | (1 << OVS_KEY_ATTR_ICMP)
 140                        | (1 << OVS_KEY_ATTR_ICMPV6)
 141                        | (1 << OVS_KEY_ATTR_ARP)
 142                        | (1 << OVS_KEY_ATTR_ND));
 143
 144        /* Always allowed mask fields. */
 145        mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
 146                       | (1 << OVS_KEY_ATTR_IN_PORT)
 147                       | (1 << OVS_KEY_ATTR_ETHERTYPE));
 148
 149        /* Check key attributes. */
 150        if (match->key->eth.type == htons(ETH_P_ARP)
 151                        || match->key->eth.type == htons(ETH_P_RARP)) {
 152                key_expected |= 1 << OVS_KEY_ATTR_ARP;
 153                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 154                        mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
 155        }
 156
 157        if (match->key->eth.type == htons(ETH_P_IP)) {
 158                key_expected |= 1 << OVS_KEY_ATTR_IPV4;
 159                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 160                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
 161
 162                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 163                        if (match->key->ip.proto == IPPROTO_UDP) {
 164                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 165                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 166                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 167                        }
 168
 169                        if (match->key->ip.proto == IPPROTO_SCTP) {
 170                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 171                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 172                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 173                        }
 174
 175                        if (match->key->ip.proto == IPPROTO_TCP) {
 176                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 177                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 178                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 179                        }
 180
 181                        if (match->key->ip.proto == IPPROTO_ICMP) {
 182                                key_expected |= 1 << OVS_KEY_ATTR_ICMP;
 183                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 184                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
 185                        }
 186                }
 187        }
 188
 189        if (match->key->eth.type == htons(ETH_P_IPV6)) {
 190                key_expected |= 1 << OVS_KEY_ATTR_IPV6;
 191                if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
 192                        mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
 193
 194                if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
 195                        if (match->key->ip.proto == IPPROTO_UDP) {
 196                                key_expected |= 1 << OVS_KEY_ATTR_UDP;
 197                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 198                                        mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
 199                        }
 200
 201                        if (match->key->ip.proto == IPPROTO_SCTP) {
 202                                key_expected |= 1 << OVS_KEY_ATTR_SCTP;
 203                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 204                                        mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
 205                        }
 206
 207                        if (match->key->ip.proto == IPPROTO_TCP) {
 208                                key_expected |= 1 << OVS_KEY_ATTR_TCP;
 209                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 210                                        mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
 211                        }
 212
 213                        if (match->key->ip.proto == IPPROTO_ICMPV6) {
 214                                key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
 215                                if (match->mask && (match->mask->key.ip.proto == 0xff))
 216                                        mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
 217
 218                                if (match->key->ipv6.tp.src ==
 219                                                htons(NDISC_NEIGHBOUR_SOLICITATION) ||
 220                                    match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 221                                        key_expected |= 1 << OVS_KEY_ATTR_ND;
 222                                        if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
 223                                                mask_allowed |= 1 << OVS_KEY_ATTR_ND;
 224                                }
 225                        }
 226                }
 227        }
 228
 229        if ((key_attrs & key_expected) != key_expected) {
 230                /* Key attributes check failed. */
 231                OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
 232                                key_attrs, key_expected);
 233                return false;
 234        }
 235
 236        if ((mask_attrs & mask_allowed) != mask_attrs) {
 237                /* Mask attributes check failed. */
 238                OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
 239                                mask_attrs, mask_allowed);
 240                return false;
 241        }
 242
 243        return true;
 244}
 245
 246static int check_header(struct sk_buff *skb, int len)
 247{
 248        if (unlikely(skb->len < len))
 249                return -EINVAL;
 250        if (unlikely(!pskb_may_pull(skb, len)))
 251                return -ENOMEM;
 252        return 0;
 253}
 254
 255static bool arphdr_ok(struct sk_buff *skb)
 256{
 257        return pskb_may_pull(skb, skb_network_offset(skb) +
 258                                  sizeof(struct arp_eth_header));
 259}
 260
 261static int check_iphdr(struct sk_buff *skb)
 262{
 263        unsigned int nh_ofs = skb_network_offset(skb);
 264        unsigned int ip_len;
 265        int err;
 266
 267        err = check_header(skb, nh_ofs + sizeof(struct iphdr));
 268        if (unlikely(err))
 269                return err;
 270
 271        ip_len = ip_hdrlen(skb);
 272        if (unlikely(ip_len < sizeof(struct iphdr) ||
 273                     skb->len < nh_ofs + ip_len))
 274                return -EINVAL;
 275
 276        skb_set_transport_header(skb, nh_ofs + ip_len);
 277        return 0;
 278}
 279
 280static bool tcphdr_ok(struct sk_buff *skb)
 281{
 282        int th_ofs = skb_transport_offset(skb);
 283        int tcp_len;
 284
 285        if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
 286                return false;
 287
 288        tcp_len = tcp_hdrlen(skb);
 289        if (unlikely(tcp_len < sizeof(struct tcphdr) ||
 290                     skb->len < th_ofs + tcp_len))
 291                return false;
 292
 293        return true;
 294}
 295
 296static bool udphdr_ok(struct sk_buff *skb)
 297{
 298        return pskb_may_pull(skb, skb_transport_offset(skb) +
 299                                  sizeof(struct udphdr));
 300}
 301
 302static bool sctphdr_ok(struct sk_buff *skb)
 303{
 304        return pskb_may_pull(skb, skb_transport_offset(skb) +
 305                                  sizeof(struct sctphdr));
 306}
 307
 308static bool icmphdr_ok(struct sk_buff *skb)
 309{
 310        return pskb_may_pull(skb, skb_transport_offset(skb) +
 311                                  sizeof(struct icmphdr));
 312}
 313
 314u64 ovs_flow_used_time(unsigned long flow_jiffies)
 315{
 316        struct timespec cur_ts;
 317        u64 cur_ms, idle_ms;
 318
 319        ktime_get_ts(&cur_ts);
 320        idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
 321        cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
 322                 cur_ts.tv_nsec / NSEC_PER_MSEC;
 323
 324        return cur_ms - idle_ms;
 325}
 326
 327static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 328{
 329        unsigned int nh_ofs = skb_network_offset(skb);
 330        unsigned int nh_len;
 331        int payload_ofs;
 332        struct ipv6hdr *nh;
 333        uint8_t nexthdr;
 334        __be16 frag_off;
 335        int err;
 336
 337        err = check_header(skb, nh_ofs + sizeof(*nh));
 338        if (unlikely(err))
 339                return err;
 340
 341        nh = ipv6_hdr(skb);
 342        nexthdr = nh->nexthdr;
 343        payload_ofs = (u8 *)(nh + 1) - skb->data;
 344
 345        key->ip.proto = NEXTHDR_NONE;
 346        key->ip.tos = ipv6_get_dsfield(nh);
 347        key->ip.ttl = nh->hop_limit;
 348        key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
 349        key->ipv6.addr.src = nh->saddr;
 350        key->ipv6.addr.dst = nh->daddr;
 351
 352        payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
 353        if (unlikely(payload_ofs < 0))
 354                return -EINVAL;
 355
 356        if (frag_off) {
 357                if (frag_off & htons(~0x7))
 358                        key->ip.frag = OVS_FRAG_TYPE_LATER;
 359                else
 360                        key->ip.frag = OVS_FRAG_TYPE_FIRST;
 361        }
 362
 363        nh_len = payload_ofs - nh_ofs;
 364        skb_set_transport_header(skb, nh_ofs + nh_len);
 365        key->ip.proto = nexthdr;
 366        return nh_len;
 367}
 368
 369static bool icmp6hdr_ok(struct sk_buff *skb)
 370{
 371        return pskb_may_pull(skb, skb_transport_offset(skb) +
 372                                  sizeof(struct icmp6hdr));
 373}
 374
 375void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
 376                       const struct sw_flow_mask *mask)
 377{
 378        const long *m = (long *)((u8 *)&mask->key + mask->range.start);
 379        const long *s = (long *)((u8 *)src + mask->range.start);
 380        long *d = (long *)((u8 *)dst + mask->range.start);
 381        int i;
 382
 383        /* The memory outside of the 'mask->range' are not set since
 384         * further operations on 'dst' only uses contents within
 385         * 'mask->range'.
 386         */
 387        for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
 388                *d++ = *s++ & *m++;
 389}
 390
 391#define TCP_FLAGS_OFFSET 13
 392#define TCP_FLAG_MASK 0x3f
 393
 394void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
 395{
 396        u8 tcp_flags = 0;
 397
 398        if ((flow->key.eth.type == htons(ETH_P_IP) ||
 399             flow->key.eth.type == htons(ETH_P_IPV6)) &&
 400            flow->key.ip.proto == IPPROTO_TCP &&
 401            likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
 402                u8 *tcp = (u8 *)tcp_hdr(skb);
 403                tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
 404        }
 405
 406        spin_lock(&flow->lock);
 407        flow->used = jiffies;
 408        flow->packet_count++;
 409        flow->byte_count += skb->len;
 410        flow->tcp_flags |= tcp_flags;
 411        spin_unlock(&flow->lock);
 412}
 413
 414struct sw_flow_actions *ovs_flow_actions_alloc(int size)
 415{
 416        struct sw_flow_actions *sfa;
 417
 418        if (size > MAX_ACTIONS_BUFSIZE)
 419                return ERR_PTR(-EINVAL);
 420
 421        sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
 422        if (!sfa)
 423                return ERR_PTR(-ENOMEM);
 424
 425        sfa->actions_len = 0;
 426        return sfa;
 427}
 428
 429struct sw_flow *ovs_flow_alloc(void)
 430{
 431        struct sw_flow *flow;
 432
 433        flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
 434        if (!flow)
 435                return ERR_PTR(-ENOMEM);
 436
 437        spin_lock_init(&flow->lock);
 438        flow->sf_acts = NULL;
 439        flow->mask = NULL;
 440
 441        return flow;
 442}
 443
 444static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
 445{
 446        hash = jhash_1word(hash, table->hash_seed);
 447        return flex_array_get(table->buckets,
 448                                (hash & (table->n_buckets - 1)));
 449}
 450
 451static struct flex_array *alloc_buckets(unsigned int n_buckets)
 452{
 453        struct flex_array *buckets;
 454        int i, err;
 455
 456        buckets = flex_array_alloc(sizeof(struct hlist_head),
 457                                   n_buckets, GFP_KERNEL);
 458        if (!buckets)
 459                return NULL;
 460
 461        err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
 462        if (err) {
 463                flex_array_free(buckets);
 464                return NULL;
 465        }
 466
 467        for (i = 0; i < n_buckets; i++)
 468                INIT_HLIST_HEAD((struct hlist_head *)
 469                                        flex_array_get(buckets, i));
 470
 471        return buckets;
 472}
 473
 474static void free_buckets(struct flex_array *buckets)
 475{
 476        flex_array_free(buckets);
 477}
 478
 479static struct flow_table *__flow_tbl_alloc(int new_size)
 480{
 481        struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
 482
 483        if (!table)
 484                return NULL;
 485
 486        table->buckets = alloc_buckets(new_size);
 487
 488        if (!table->buckets) {
 489                kfree(table);
 490                return NULL;
 491        }
 492        table->n_buckets = new_size;
 493        table->count = 0;
 494        table->node_ver = 0;
 495        table->keep_flows = false;
 496        get_random_bytes(&table->hash_seed, sizeof(u32));
 497        table->mask_list = NULL;
 498
 499        return table;
 500}
 501
 502static void __flow_tbl_destroy(struct flow_table *table)
 503{
 504        int i;
 505
 506        if (table->keep_flows)
 507                goto skip_flows;
 508
 509        for (i = 0; i < table->n_buckets; i++) {
 510                struct sw_flow *flow;
 511                struct hlist_head *head = flex_array_get(table->buckets, i);
 512                struct hlist_node *n;
 513                int ver = table->node_ver;
 514
 515                hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
 516                        hlist_del(&flow->hash_node[ver]);
 517                        ovs_flow_free(flow, false);
 518                }
 519        }
 520
 521        BUG_ON(!list_empty(table->mask_list));
 522        kfree(table->mask_list);
 523
 524skip_flows:
 525        free_buckets(table->buckets);
 526        kfree(table);
 527}
 528
 529struct flow_table *ovs_flow_tbl_alloc(int new_size)
 530{
 531        struct flow_table *table = __flow_tbl_alloc(new_size);
 532
 533        if (!table)
 534                return NULL;
 535
 536        table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
 537        if (!table->mask_list) {
 538                table->keep_flows = true;
 539                __flow_tbl_destroy(table);
 540                return NULL;
 541        }
 542        INIT_LIST_HEAD(table->mask_list);
 543
 544        return table;
 545}
 546
 547static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
 548{
 549        struct flow_table *table = container_of(rcu, struct flow_table, rcu);
 550
 551        __flow_tbl_destroy(table);
 552}
 553
 554void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
 555{
 556        if (!table)
 557                return;
 558
 559        if (deferred)
 560                call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
 561        else
 562                __flow_tbl_destroy(table);
 563}
 564
 565struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
 566{
 567        struct sw_flow *flow;
 568        struct hlist_head *head;
 569        int ver;
 570        int i;
 571
 572        ver = table->node_ver;
 573        while (*bucket < table->n_buckets) {
 574                i = 0;
 575                head = flex_array_get(table->buckets, *bucket);
 576                hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
 577                        if (i < *last) {
 578                                i++;
 579                                continue;
 580                        }
 581                        *last = i + 1;
 582                        return flow;
 583                }
 584                (*bucket)++;
 585                *last = 0;
 586        }
 587
 588        return NULL;
 589}
 590
 591static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
 592{
 593        struct hlist_head *head;
 594
 595        head = find_bucket(table, flow->hash);
 596        hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
 597
 598        table->count++;
 599}
 600
 601static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
 602{
 603        int old_ver;
 604        int i;
 605
 606        old_ver = old->node_ver;
 607        new->node_ver = !old_ver;
 608
 609        /* Insert in new table. */
 610        for (i = 0; i < old->n_buckets; i++) {
 611                struct sw_flow *flow;
 612                struct hlist_head *head;
 613
 614                head = flex_array_get(old->buckets, i);
 615
 616                hlist_for_each_entry(flow, head, hash_node[old_ver])
 617                        __tbl_insert(new, flow);
 618        }
 619
 620        new->mask_list = old->mask_list;
 621        old->keep_flows = true;
 622}
 623
 624static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
 625{
 626        struct flow_table *new_table;
 627
 628        new_table = __flow_tbl_alloc(n_buckets);
 629        if (!new_table)
 630                return ERR_PTR(-ENOMEM);
 631
 632        flow_table_copy_flows(table, new_table);
 633
 634        return new_table;
 635}
 636
 637struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
 638{
 639        return __flow_tbl_rehash(table, table->n_buckets);
 640}
 641
 642struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
 643{
 644        return __flow_tbl_rehash(table, table->n_buckets * 2);
 645}
 646
 647static void __flow_free(struct sw_flow *flow)
 648{
 649        kfree((struct sf_flow_acts __force *)flow->sf_acts);
 650        kmem_cache_free(flow_cache, flow);
 651}
 652
 653static void rcu_free_flow_callback(struct rcu_head *rcu)
 654{
 655        struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
 656
 657        __flow_free(flow);
 658}
 659
 660void ovs_flow_free(struct sw_flow *flow, bool deferred)
 661{
 662        if (!flow)
 663                return;
 664
 665        ovs_sw_flow_mask_del_ref(flow->mask, deferred);
 666
 667        if (deferred)
 668                call_rcu(&flow->rcu, rcu_free_flow_callback);
 669        else
 670                __flow_free(flow);
 671}
 672
 673/* Schedules 'sf_acts' to be freed after the next RCU grace period.
 674 * The caller must hold rcu_read_lock for this to be sensible. */
 675void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
 676{
 677        kfree_rcu(sf_acts, rcu);
 678}
 679
 680static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 681{
 682        struct qtag_prefix {
 683                __be16 eth_type; /* ETH_P_8021Q */
 684                __be16 tci;
 685        };
 686        struct qtag_prefix *qp;
 687
 688        if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
 689                return 0;
 690
 691        if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
 692                                         sizeof(__be16))))
 693                return -ENOMEM;
 694
 695        qp = (struct qtag_prefix *) skb->data;
 696        key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
 697        __skb_pull(skb, sizeof(struct qtag_prefix));
 698
 699        return 0;
 700}
 701
 702static __be16 parse_ethertype(struct sk_buff *skb)
 703{
 704        struct llc_snap_hdr {
 705                u8  dsap;  /* Always 0xAA */
 706                u8  ssap;  /* Always 0xAA */
 707                u8  ctrl;
 708                u8  oui[3];
 709                __be16 ethertype;
 710        };
 711        struct llc_snap_hdr *llc;
 712        __be16 proto;
 713
 714        proto = *(__be16 *) skb->data;
 715        __skb_pull(skb, sizeof(__be16));
 716
 717        if (ntohs(proto) >= ETH_P_802_3_MIN)
 718                return proto;
 719
 720        if (skb->len < sizeof(struct llc_snap_hdr))
 721                return htons(ETH_P_802_2);
 722
 723        if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
 724                return htons(0);
 725
 726        llc = (struct llc_snap_hdr *) skb->data;
 727        if (llc->dsap != LLC_SAP_SNAP ||
 728            llc->ssap != LLC_SAP_SNAP ||
 729            (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
 730                return htons(ETH_P_802_2);
 731
 732        __skb_pull(skb, sizeof(struct llc_snap_hdr));
 733
 734        if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
 735                return llc->ethertype;
 736
 737        return htons(ETH_P_802_2);
 738}
 739
 740static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 741                        int nh_len)
 742{
 743        struct icmp6hdr *icmp = icmp6_hdr(skb);
 744
 745        /* The ICMPv6 type and code fields use the 16-bit transport port
 746         * fields, so we need to store them in 16-bit network byte order.
 747         */
 748        key->ipv6.tp.src = htons(icmp->icmp6_type);
 749        key->ipv6.tp.dst = htons(icmp->icmp6_code);
 750
 751        if (icmp->icmp6_code == 0 &&
 752            (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
 753             icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 754                int icmp_len = skb->len - skb_transport_offset(skb);
 755                struct nd_msg *nd;
 756                int offset;
 757
 758                /* In order to process neighbor discovery options, we need the
 759                 * entire packet.
 760                 */
 761                if (unlikely(icmp_len < sizeof(*nd)))
 762                        return 0;
 763
 764                if (unlikely(skb_linearize(skb)))
 765                        return -ENOMEM;
 766
 767                nd = (struct nd_msg *)skb_transport_header(skb);
 768                key->ipv6.nd.target = nd->target;
 769
 770                icmp_len -= sizeof(*nd);
 771                offset = 0;
 772                while (icmp_len >= 8) {
 773                        struct nd_opt_hdr *nd_opt =
 774                                 (struct nd_opt_hdr *)(nd->opt + offset);
 775                        int opt_len = nd_opt->nd_opt_len * 8;
 776
 777                        if (unlikely(!opt_len || opt_len > icmp_len))
 778                                return 0;
 779
 780                        /* Store the link layer address if the appropriate
 781                         * option is provided.  It is considered an error if
 782                         * the same link layer option is specified twice.
 783                         */
 784                        if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
 785                            && opt_len == 8) {
 786                                if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
 787                                        goto invalid;
 788                                memcpy(key->ipv6.nd.sll,
 789                                    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
 790                        } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
 791                                   && opt_len == 8) {
 792                                if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
 793                                        goto invalid;
 794                                memcpy(key->ipv6.nd.tll,
 795                                    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
 796                        }
 797
 798                        icmp_len -= opt_len;
 799                        offset += opt_len;
 800                }
 801        }
 802
 803        return 0;
 804
 805invalid:
 806        memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
 807        memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
 808        memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
 809
 810        return 0;
 811}
 812
 813/**
 814 * ovs_flow_extract - extracts a flow key from an Ethernet frame.
 815 * @skb: sk_buff that contains the frame, with skb->data pointing to the
 816 * Ethernet header
 817 * @in_port: port number on which @skb was received.
 818 * @key: output flow key
 819 *
 820 * The caller must ensure that skb->len >= ETH_HLEN.
 821 *
 822 * Returns 0 if successful, otherwise a negative errno value.
 823 *
 824 * Initializes @skb header pointers as follows:
 825 *
 826 *    - skb->mac_header: the Ethernet header.
 827 *
 828 *    - skb->network_header: just past the Ethernet header, or just past the
 829 *      VLAN header, to the first byte of the Ethernet payload.
 830 *
 831 *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
 832 *      on output, then just past the IP header, if one is present and
 833 *      of a correct length, otherwise the same as skb->network_header.
 834 *      For other key->eth.type values it is left untouched.
 835 */
 836int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 837{
 838        int error;
 839        struct ethhdr *eth;
 840
 841        memset(key, 0, sizeof(*key));
 842
 843        key->phy.priority = skb->priority;
 844        if (OVS_CB(skb)->tun_key)
 845                memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
 846        key->phy.in_port = in_port;
 847        key->phy.skb_mark = skb->mark;
 848
 849        skb_reset_mac_header(skb);
 850
 851        /* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
 852         * header in the linear data area.
 853         */
 854        eth = eth_hdr(skb);
 855        memcpy(key->eth.src, eth->h_source, ETH_ALEN);
 856        memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
 857
 858        __skb_pull(skb, 2 * ETH_ALEN);
 859        /* We are going to push all headers that we pull, so no need to
 860         * update skb->csum here.
 861         */
 862
 863        if (vlan_tx_tag_present(skb))
 864                key->eth.tci = htons(skb->vlan_tci);
 865        else if (eth->h_proto == htons(ETH_P_8021Q))
 866                if (unlikely(parse_vlan(skb, key)))
 867                        return -ENOMEM;
 868
 869        key->eth.type = parse_ethertype(skb);
 870        if (unlikely(key->eth.type == htons(0)))
 871                return -ENOMEM;
 872
 873        skb_reset_network_header(skb);
 874        __skb_push(skb, skb->data - skb_mac_header(skb));
 875
 876        /* Network layer. */
 877        if (key->eth.type == htons(ETH_P_IP)) {
 878                struct iphdr *nh;
 879                __be16 offset;
 880
 881                error = check_iphdr(skb);
 882                if (unlikely(error)) {
 883                        if (error == -EINVAL) {
 884                                skb->transport_header = skb->network_header;
 885                                error = 0;
 886                        }
 887                        return error;
 888                }
 889
 890                nh = ip_hdr(skb);
 891                key->ipv4.addr.src = nh->saddr;
 892                key->ipv4.addr.dst = nh->daddr;
 893
 894                key->ip.proto = nh->protocol;
 895                key->ip.tos = nh->tos;
 896                key->ip.ttl = nh->ttl;
 897
 898                offset = nh->frag_off & htons(IP_OFFSET);
 899                if (offset) {
 900                        key->ip.frag = OVS_FRAG_TYPE_LATER;
 901                        return 0;
 902                }
 903                if (nh->frag_off & htons(IP_MF) ||
 904                         skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 905                        key->ip.frag = OVS_FRAG_TYPE_FIRST;
 906
 907                /* Transport layer. */
 908                if (key->ip.proto == IPPROTO_TCP) {
 909                        if (tcphdr_ok(skb)) {
 910                                struct tcphdr *tcp = tcp_hdr(skb);
 911                                key->ipv4.tp.src = tcp->source;
 912                                key->ipv4.tp.dst = tcp->dest;
 913                        }
 914                } else if (key->ip.proto == IPPROTO_UDP) {
 915                        if (udphdr_ok(skb)) {
 916                                struct udphdr *udp = udp_hdr(skb);
 917                                key->ipv4.tp.src = udp->source;
 918                                key->ipv4.tp.dst = udp->dest;
 919                        }
 920                } else if (key->ip.proto == IPPROTO_SCTP) {
 921                        if (sctphdr_ok(skb)) {
 922                                struct sctphdr *sctp = sctp_hdr(skb);
 923                                key->ipv4.tp.src = sctp->source;
 924                                key->ipv4.tp.dst = sctp->dest;
 925                        }
 926                } else if (key->ip.proto == IPPROTO_ICMP) {
 927                        if (icmphdr_ok(skb)) {
 928                                struct icmphdr *icmp = icmp_hdr(skb);
 929                                /* The ICMP type and code fields use the 16-bit
 930                                 * transport port fields, so we need to store
 931                                 * them in 16-bit network byte order. */
 932                                key->ipv4.tp.src = htons(icmp->type);
 933                                key->ipv4.tp.dst = htons(icmp->code);
 934                        }
 935                }
 936
 937        } else if ((key->eth.type == htons(ETH_P_ARP) ||
 938                   key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
 939                struct arp_eth_header *arp;
 940
 941                arp = (struct arp_eth_header *)skb_network_header(skb);
 942
 943                if (arp->ar_hrd == htons(ARPHRD_ETHER)
 944                                && arp->ar_pro == htons(ETH_P_IP)
 945                                && arp->ar_hln == ETH_ALEN
 946                                && arp->ar_pln == 4) {
 947
 948                        /* We only match on the lower 8 bits of the opcode. */
 949                        if (ntohs(arp->ar_op) <= 0xff)
 950                                key->ip.proto = ntohs(arp->ar_op);
 951                        memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
 952                        memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
 953                        memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
 954                        memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
 955                }
 956        } else if (key->eth.type == htons(ETH_P_IPV6)) {
 957                int nh_len;             /* IPv6 Header + Extensions */
 958
 959                nh_len = parse_ipv6hdr(skb, key);
 960                if (unlikely(nh_len < 0)) {
 961                        if (nh_len == -EINVAL) {
 962                                skb->transport_header = skb->network_header;
 963                                error = 0;
 964                        } else {
 965                                error = nh_len;
 966                        }
 967                        return error;
 968                }
 969
 970                if (key->ip.frag == OVS_FRAG_TYPE_LATER)
 971                        return 0;
 972                if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 973                        key->ip.frag = OVS_FRAG_TYPE_FIRST;
 974
 975                /* Transport layer. */
 976                if (key->ip.proto == NEXTHDR_TCP) {
 977                        if (tcphdr_ok(skb)) {
 978                                struct tcphdr *tcp = tcp_hdr(skb);
 979                                key->ipv6.tp.src = tcp->source;
 980                                key->ipv6.tp.dst = tcp->dest;
 981                        }
 982                } else if (key->ip.proto == NEXTHDR_UDP) {
 983                        if (udphdr_ok(skb)) {
 984                                struct udphdr *udp = udp_hdr(skb);
 985                                key->ipv6.tp.src = udp->source;
 986                                key->ipv6.tp.dst = udp->dest;
 987                        }
 988                } else if (key->ip.proto == NEXTHDR_SCTP) {
 989                        if (sctphdr_ok(skb)) {
 990                                struct sctphdr *sctp = sctp_hdr(skb);
 991                                key->ipv6.tp.src = sctp->source;
 992                                key->ipv6.tp.dst = sctp->dest;
 993                        }
 994                } else if (key->ip.proto == NEXTHDR_ICMP) {
 995                        if (icmp6hdr_ok(skb)) {
 996                                error = parse_icmpv6(skb, key, nh_len);
 997                                if (error)
 998                                        return error;
 999                        }
1000                }
1001        }
1002
1003        return 0;
1004}
1005
1006static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
1007                         int key_end)
1008{
1009        u32 *hash_key = (u32 *)((u8 *)key + key_start);
1010        int hash_u32s = (key_end - key_start) >> 2;
1011
1012        /* Make sure number of hash bytes are multiple of u32. */
1013        BUILD_BUG_ON(sizeof(long) % sizeof(u32));
1014
1015        return jhash2(hash_key, hash_u32s, 0);
1016}
1017
1018static int flow_key_start(const struct sw_flow_key *key)
1019{
1020        if (key->tun_key.ipv4_dst)
1021                return 0;
1022        else
1023                return rounddown(offsetof(struct sw_flow_key, phy),
1024                                          sizeof(long));
1025}
1026
1027static bool __cmp_key(const struct sw_flow_key *key1,
1028                const struct sw_flow_key *key2,  int key_start, int key_end)
1029{
1030        const long *cp1 = (long *)((u8 *)key1 + key_start);
1031        const long *cp2 = (long *)((u8 *)key2 + key_start);
1032        long diffs = 0;
1033        int i;
1034
1035        for (i = key_start; i < key_end;  i += sizeof(long))
1036                diffs |= *cp1++ ^ *cp2++;
1037
1038        return diffs == 0;
1039}
1040
1041static bool __flow_cmp_masked_key(const struct sw_flow *flow,
1042                const struct sw_flow_key *key, int key_start, int key_end)
1043{
1044        return __cmp_key(&flow->key, key, key_start, key_end);
1045}
1046
1047static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
1048                  const struct sw_flow_key *key, int key_start, int key_end)
1049{
1050        return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
1051}
1052
1053bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
1054                const struct sw_flow_key *key, int key_end)
1055{
1056        int key_start;
1057        key_start = flow_key_start(key);
1058
1059        return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
1060
1061}
1062
1063struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
1064                                       struct sw_flow_match *match)
1065{
1066        struct sw_flow_key *unmasked = match->key;
1067        int key_end = match->range.end;
1068        struct sw_flow *flow;
1069
1070        flow = ovs_flow_lookup(table, unmasked);
1071        if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
1072                flow = NULL;
1073
1074        return flow;
1075}
1076
1077static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
1078                                    const struct sw_flow_key *unmasked,
1079                                    struct sw_flow_mask *mask)
1080{
1081        struct sw_flow *flow;
1082        struct hlist_head *head;
1083        int key_start = mask->range.start;
1084        int key_end = mask->range.end;
1085        u32 hash;
1086        struct sw_flow_key masked_key;
1087
1088        ovs_flow_key_mask(&masked_key, unmasked, mask);
1089        hash = ovs_flow_hash(&masked_key, key_start, key_end);
1090        head = find_bucket(table, hash);
1091        hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
1092                if (flow->mask == mask &&
1093                    __flow_cmp_masked_key(flow, &masked_key,
1094                                          key_start, key_end))
1095                        return flow;
1096        }
1097        return NULL;
1098}
1099
1100struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
1101                                const struct sw_flow_key *key)
1102{
1103        struct sw_flow *flow = NULL;
1104        struct sw_flow_mask *mask;
1105
1106        list_for_each_entry_rcu(mask, tbl->mask_list, list) {
1107                flow = ovs_masked_flow_lookup(tbl, key, mask);
1108                if (flow)  /* Found */
1109                        break;
1110        }
1111
1112        return flow;
1113}
1114
1115
1116void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
1117{
1118        flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
1119                        flow->mask->range.end);
1120        __tbl_insert(table, flow);
1121}
1122
1123void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
1124{
1125        BUG_ON(table->count == 0);
1126        hlist_del_rcu(&flow->hash_node[table->node_ver]);
1127        table->count--;
1128}
1129
1130/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
1131const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
1132        [OVS_KEY_ATTR_ENCAP] = -1,
1133        [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
1134        [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
1135        [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
1136        [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
1137        [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
1138        [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
1139        [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
1140        [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
1141        [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
1142        [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
1143        [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
1144        [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
1145        [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
1146        [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
1147        [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
1148        [OVS_KEY_ATTR_TUNNEL] = -1,
1149};
1150
1151static bool is_all_zero(const u8 *fp, size_t size)
1152{
1153        int i;
1154
1155        if (!fp)
1156                return false;
1157
1158        for (i = 0; i < size; i++)
1159                if (fp[i])
1160                        return false;
1161
1162        return true;
1163}
1164
1165static int __parse_flow_nlattrs(const struct nlattr *attr,
1166                              const struct nlattr *a[],
1167                              u64 *attrsp, bool nz)
1168{
1169        const struct nlattr *nla;
1170        u32 attrs;
1171        int rem;
1172
1173        attrs = *attrsp;
1174        nla_for_each_nested(nla, attr, rem) {
1175                u16 type = nla_type(nla);
1176                int expected_len;
1177
1178                if (type > OVS_KEY_ATTR_MAX) {
1179                        OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
1180                                  type, OVS_KEY_ATTR_MAX);
1181                        return -EINVAL;
1182                }
1183
1184                if (attrs & (1 << type)) {
1185                        OVS_NLERR("Duplicate key attribute (type %d).\n", type);
1186                        return -EINVAL;
1187                }
1188
1189                expected_len = ovs_key_lens[type];
1190                if (nla_len(nla) != expected_len && expected_len != -1) {
1191                        OVS_NLERR("Key attribute has unexpected length (type=%d"
1192                                  ", length=%d, expected=%d).\n", type,
1193                                  nla_len(nla), expected_len);
1194                        return -EINVAL;
1195                }
1196
1197                if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
1198                        attrs |= 1 << type;
1199                        a[type] = nla;
1200                }
1201        }
1202        if (rem) {
1203                OVS_NLERR("Message has %d unknown bytes.\n", rem);
1204                return -EINVAL;
1205        }
1206
1207        *attrsp = attrs;
1208        return 0;
1209}
1210
1211static int parse_flow_mask_nlattrs(const struct nlattr *attr,
1212                              const struct nlattr *a[], u64 *attrsp)
1213{
1214        return __parse_flow_nlattrs(attr, a, attrsp, true);
1215}
1216
1217static int parse_flow_nlattrs(const struct nlattr *attr,
1218                              const struct nlattr *a[], u64 *attrsp)
1219{
1220        return __parse_flow_nlattrs(attr, a, attrsp, false);
1221}
1222
1223int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
1224                             struct sw_flow_match *match, bool is_mask)
1225{
1226        struct nlattr *a;
1227        int rem;
1228        bool ttl = false;
1229        __be16 tun_flags = 0;
1230
1231        nla_for_each_nested(a, attr, rem) {
1232                int type = nla_type(a);
1233                static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
1234                        [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
1235                        [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
1236                        [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
1237                        [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
1238                        [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
1239                        [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
1240                        [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
1241                };
1242
1243                if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
1244                        OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
1245                        type, OVS_TUNNEL_KEY_ATTR_MAX);
1246                        return -EINVAL;
1247                }
1248
1249                if (ovs_tunnel_key_lens[type] != nla_len(a)) {
1250                        OVS_NLERR("IPv4 tunnel attribute type has unexpected "
1251                                  " length (type=%d, length=%d, expected=%d).\n",
1252                                  type, nla_len(a), ovs_tunnel_key_lens[type]);
1253                        return -EINVAL;
1254                }
1255
1256                switch (type) {
1257                case OVS_TUNNEL_KEY_ATTR_ID:
1258                        SW_FLOW_KEY_PUT(match, tun_key.tun_id,
1259                                        nla_get_be64(a), is_mask);
1260                        tun_flags |= TUNNEL_KEY;
1261                        break;
1262                case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1263                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
1264                                        nla_get_be32(a), is_mask);
1265                        break;
1266                case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1267                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
1268                                        nla_get_be32(a), is_mask);
1269                        break;
1270                case OVS_TUNNEL_KEY_ATTR_TOS:
1271                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
1272                                        nla_get_u8(a), is_mask);
1273                        break;
1274                case OVS_TUNNEL_KEY_ATTR_TTL:
1275                        SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
1276                                        nla_get_u8(a), is_mask);
1277                        ttl = true;
1278                        break;
1279                case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1280                        tun_flags |= TUNNEL_DONT_FRAGMENT;
1281                        break;
1282                case OVS_TUNNEL_KEY_ATTR_CSUM:
1283                        tun_flags |= TUNNEL_CSUM;
1284                        break;
1285                default:
1286                        return -EINVAL;
1287                }
1288        }
1289
1290        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
1291
1292        if (rem > 0) {
1293                OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
1294                return -EINVAL;
1295        }
1296
1297        if (!is_mask) {
1298                if (!match->key->tun_key.ipv4_dst) {
1299                        OVS_NLERR("IPv4 tunnel destination address is zero.\n");
1300                        return -EINVAL;
1301                }
1302
1303                if (!ttl) {
1304                        OVS_NLERR("IPv4 tunnel TTL not specified.\n");
1305                        return -EINVAL;
1306                }
1307        }
1308
1309        return 0;
1310}
1311
1312int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1313                           const struct ovs_key_ipv4_tunnel *tun_key,
1314                           const struct ovs_key_ipv4_tunnel *output)
1315{
1316        struct nlattr *nla;
1317
1318        nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
1319        if (!nla)
1320                return -EMSGSIZE;
1321
1322        if (output->tun_flags & TUNNEL_KEY &&
1323            nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
1324                return -EMSGSIZE;
1325        if (output->ipv4_src &&
1326                nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
1327                return -EMSGSIZE;
1328        if (output->ipv4_dst &&
1329                nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
1330                return -EMSGSIZE;
1331        if (output->ipv4_tos &&
1332                nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
1333                return -EMSGSIZE;
1334        if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
1335                return -EMSGSIZE;
1336        if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
1337                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
1338                return -EMSGSIZE;
1339        if ((output->tun_flags & TUNNEL_CSUM) &&
1340                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
1341                return -EMSGSIZE;
1342
1343        nla_nest_end(skb, nla);
1344        return 0;
1345}
1346
1347static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
1348                const struct nlattr **a, bool is_mask)
1349{
1350        if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1351                SW_FLOW_KEY_PUT(match, phy.priority,
1352                          nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1353                *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1354        }
1355
1356        if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1357                u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1358
1359                if (is_mask)
1360                        in_port = 0xffffffff; /* Always exact match in_port. */
1361                else if (in_port >= DP_MAX_PORTS)
1362                        return -EINVAL;
1363
1364                SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1365                *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1366        } else if (!is_mask) {
1367                SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1368        }
1369
1370        if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1371                uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1372
1373                SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1374                *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1375        }
1376        if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1377                if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1378                                        is_mask))
1379                        return -EINVAL;
1380                *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1381        }
1382        return 0;
1383}
1384
1385static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
1386                const struct nlattr **a, bool is_mask)
1387{
1388        int err;
1389        u64 orig_attrs = attrs;
1390
1391        err = metadata_from_nlattrs(match, &attrs, a, is_mask);
1392        if (err)
1393                return err;
1394
1395        if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1396                const struct ovs_key_ethernet *eth_key;
1397
1398                eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1399                SW_FLOW_KEY_MEMCPY(match, eth.src,
1400                                eth_key->eth_src, ETH_ALEN, is_mask);
1401                SW_FLOW_KEY_MEMCPY(match, eth.dst,
1402                                eth_key->eth_dst, ETH_ALEN, is_mask);
1403                attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1404        }
1405
1406        if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1407                __be16 tci;
1408
1409                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1410                if (!(tci & htons(VLAN_TAG_PRESENT))) {
1411                        if (is_mask)
1412                                OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
1413                        else
1414                                OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
1415
1416                        return -EINVAL;
1417                }
1418
1419                SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
1420                attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1421        } else if (!is_mask)
1422                SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1423
1424        if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1425                __be16 eth_type;
1426
1427                eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1428                if (is_mask) {
1429                        /* Always exact match EtherType. */
1430                        eth_type = htons(0xffff);
1431                } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
1432                        OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
1433                                        ntohs(eth_type), ETH_P_802_3_MIN);
1434                        return -EINVAL;
1435                }
1436
1437                SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1438                attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1439        } else if (!is_mask) {
1440                SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1441        }
1442
1443        if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1444                const struct ovs_key_ipv4 *ipv4_key;
1445
1446                ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1447                if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1448                        OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
1449                                ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1450                        return -EINVAL;
1451                }
1452                SW_FLOW_KEY_PUT(match, ip.proto,
1453                                ipv4_key->ipv4_proto, is_mask);
1454                SW_FLOW_KEY_PUT(match, ip.tos,
1455                                ipv4_key->ipv4_tos, is_mask);
1456                SW_FLOW_KEY_PUT(match, ip.ttl,
1457                                ipv4_key->ipv4_ttl, is_mask);
1458                SW_FLOW_KEY_PUT(match, ip.frag,
1459                                ipv4_key->ipv4_frag, is_mask);
1460                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1461                                ipv4_key->ipv4_src, is_mask);
1462                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1463                                ipv4_key->ipv4_dst, is_mask);
1464                attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1465        }
1466
1467        if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1468                const struct ovs_key_ipv6 *ipv6_key;
1469
1470                ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1471                if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1472                        OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
1473                                ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1474                        return -EINVAL;
1475                }
1476                SW_FLOW_KEY_PUT(match, ipv6.label,
1477                                ipv6_key->ipv6_label, is_mask);
1478                SW_FLOW_KEY_PUT(match, ip.proto,
1479                                ipv6_key->ipv6_proto, is_mask);
1480                SW_FLOW_KEY_PUT(match, ip.tos,
1481                                ipv6_key->ipv6_tclass, is_mask);
1482                SW_FLOW_KEY_PUT(match, ip.ttl,
1483                                ipv6_key->ipv6_hlimit, is_mask);
1484                SW_FLOW_KEY_PUT(match, ip.frag,
1485                                ipv6_key->ipv6_frag, is_mask);
1486                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1487                                ipv6_key->ipv6_src,
1488                                sizeof(match->key->ipv6.addr.src),
1489                                is_mask);
1490                SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1491                                ipv6_key->ipv6_dst,
1492                                sizeof(match->key->ipv6.addr.dst),
1493                                is_mask);
1494
1495                attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1496        }
1497
1498        if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1499                const struct ovs_key_arp *arp_key;
1500
1501                arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1502                if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1503                        OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
1504                                  arp_key->arp_op);
1505                        return -EINVAL;
1506                }
1507
1508                SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1509                                arp_key->arp_sip, is_mask);
1510                SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1511                        arp_key->arp_tip, is_mask);
1512                SW_FLOW_KEY_PUT(match, ip.proto,
1513                                ntohs(arp_key->arp_op), is_mask);
1514                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1515                                arp_key->arp_sha, ETH_ALEN, is_mask);
1516                SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1517                                arp_key->arp_tha, ETH_ALEN, is_mask);
1518
1519                attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1520        }
1521
1522        if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1523                const struct ovs_key_tcp *tcp_key;
1524
1525                tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1526                if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1527                        SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1528                                        tcp_key->tcp_src, is_mask);
1529                        SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1530                                        tcp_key->tcp_dst, is_mask);
1531                } else {
1532                        SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1533                                        tcp_key->tcp_src, is_mask);
1534                        SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1535                                        tcp_key->tcp_dst, is_mask);
1536                }
1537                attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1538        }
1539
1540        if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1541                const struct ovs_key_udp *udp_key;
1542
1543                udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1544                if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1545                        SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1546                                        udp_key->udp_src, is_mask);
1547                        SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1548                                        udp_key->udp_dst, is_mask);
1549                } else {
1550                        SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1551                                        udp_key->udp_src, is_mask);
1552                        SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1553                                        udp_key->udp_dst, is_mask);
1554                }
1555                attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1556        }
1557
1558        if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1559                const struct ovs_key_sctp *sctp_key;
1560
1561                sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1562                if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1563                        SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1564                                        sctp_key->sctp_src, is_mask);
1565                        SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1566                                        sctp_key->sctp_dst, is_mask);
1567                } else {
1568                        SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1569                                        sctp_key->sctp_src, is_mask);
1570                        SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1571                                        sctp_key->sctp_dst, is_mask);
1572                }
1573                attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1574        }
1575
1576        if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1577                const struct ovs_key_icmp *icmp_key;
1578
1579                icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1580                SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1581                                htons(icmp_key->icmp_type), is_mask);
1582                SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1583                                htons(icmp_key->icmp_code), is_mask);
1584                attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1585        }
1586
1587        if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1588                const struct ovs_key_icmpv6 *icmpv6_key;
1589
1590                icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1591                SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1592                                htons(icmpv6_key->icmpv6_type), is_mask);
1593                SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1594                                htons(icmpv6_key->icmpv6_code), is_mask);
1595                attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1596        }
1597
1598        if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1599                const struct ovs_key_nd *nd_key;
1600
1601                nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1602                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1603                        nd_key->nd_target,
1604                        sizeof(match->key->ipv6.nd.target),
1605                        is_mask);
1606                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1607                        nd_key->nd_sll, ETH_ALEN, is_mask);
1608                SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1609                                nd_key->nd_tll, ETH_ALEN, is_mask);
1610                attrs &= ~(1 << OVS_KEY_ATTR_ND);
1611        }
1612
1613        if (attrs != 0)
1614                return -EINVAL;
1615
1616        return 0;
1617}
1618
1619/**
1620 * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
1621 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1622 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1623 * does not include any don't care bit.
1624 * @match: receives the extracted flow match information.
1625 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1626 * sequence. The fields should of the packet that triggered the creation
1627 * of this flow.
1628 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1629 * attribute specifies the mask field of the wildcarded flow.
1630 */
1631int ovs_match_from_nlattrs(struct sw_flow_match *match,
1632                           const struct nlattr *key,
1633                           const struct nlattr *mask)
1634{
1635        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1636        const struct nlattr *encap;
1637        u64 key_attrs = 0;
1638        u64 mask_attrs = 0;
1639        bool encap_valid = false;
1640        int err;
1641
1642        err = parse_flow_nlattrs(key, a, &key_attrs);
1643        if (err)
1644                return err;
1645
1646        if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
1647            (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
1648            (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
1649                __be16 tci;
1650
1651                if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1652                      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1653                        OVS_NLERR("Invalid Vlan frame.\n");
1654                        return -EINVAL;
1655                }
1656
1657                key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1658                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1659                encap = a[OVS_KEY_ATTR_ENCAP];
1660                key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1661                encap_valid = true;
1662
1663                if (tci & htons(VLAN_TAG_PRESENT)) {
1664                        err = parse_flow_nlattrs(encap, a, &key_attrs);
1665                        if (err)
1666                                return err;
1667                } else if (!tci) {
1668                        /* Corner case for truncated 802.1Q header. */
1669                        if (nla_len(encap)) {
1670                                OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
1671                                return -EINVAL;
1672                        }
1673                } else {
1674                        OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
1675                        return  -EINVAL;
1676                }
1677        }
1678
1679        err = ovs_key_from_nlattrs(match, key_attrs, a, false);
1680        if (err)
1681                return err;
1682
1683        if (mask) {
1684                err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
1685                if (err)
1686                        return err;
1687
1688                if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP)  {
1689                        __be16 eth_type = 0;
1690                        __be16 tci = 0;
1691
1692                        if (!encap_valid) {
1693                                OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
1694                                return  -EINVAL;
1695                        }
1696
1697                        mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1698                        if (a[OVS_KEY_ATTR_ETHERTYPE])
1699                                eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1700
1701                        if (eth_type == htons(0xffff)) {
1702                                mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1703                                encap = a[OVS_KEY_ATTR_ENCAP];
1704                                err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
1705                        } else {
1706                                OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
1707                                                ntohs(eth_type));
1708                                return -EINVAL;
1709                        }
1710
1711                        if (a[OVS_KEY_ATTR_VLAN])
1712                                tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1713
1714                        if (!(tci & htons(VLAN_TAG_PRESENT))) {
1715                                OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
1716                                return -EINVAL;
1717                        }
1718                }
1719
1720                err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
1721                if (err)
1722                        return err;
1723        } else {
1724                /* Populate exact match flow's key mask. */
1725                if (match->mask)
1726                        ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
1727        }
1728
1729        if (!ovs_match_validate(match, key_attrs, mask_attrs))
1730                return -EINVAL;
1731
1732        return 0;
1733}
1734
1735/**
1736 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
1737 * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
1738 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1739 * sequence.
1740 *
1741 * This parses a series of Netlink attributes that form a flow key, which must
1742 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1743 * get the metadata, that is, the parts of the flow key that cannot be
1744 * extracted from the packet itself.
1745 */
1746
1747int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
1748                const struct nlattr *attr)
1749{
1750        struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
1751        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1752        u64 attrs = 0;
1753        int err;
1754        struct sw_flow_match match;
1755
1756        flow->key.phy.in_port = DP_MAX_PORTS;
1757        flow->key.phy.priority = 0;
1758        flow->key.phy.skb_mark = 0;
1759        memset(tun_key, 0, sizeof(flow->key.tun_key));
1760
1761        err = parse_flow_nlattrs(attr, a, &attrs);
1762        if (err)
1763                return -EINVAL;
1764
1765        memset(&match, 0, sizeof(match));
1766        match.key = &flow->key;
1767
1768        err = metadata_from_nlattrs(&match, &attrs, a, false);
1769        if (err)
1770                return err;
1771
1772        return 0;
1773}
1774
1775int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
1776                const struct sw_flow_key *output, struct sk_buff *skb)
1777{
1778        struct ovs_key_ethernet *eth_key;
1779        struct nlattr *nla, *encap;
1780        bool is_mask = (swkey != output);
1781
1782        if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1783                goto nla_put_failure;
1784
1785        if ((swkey->tun_key.ipv4_dst || is_mask) &&
1786            ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
1787                goto nla_put_failure;
1788
1789        if (swkey->phy.in_port == DP_MAX_PORTS) {
1790                if (is_mask && (output->phy.in_port == 0xffff))
1791                        if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1792                                goto nla_put_failure;
1793        } else {
1794                u16 upper_u16;
1795                upper_u16 = !is_mask ? 0 : 0xffff;
1796
1797                if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1798                                (upper_u16 << 16) | output->phy.in_port))
1799                        goto nla_put_failure;
1800        }
1801
1802        if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1803                goto nla_put_failure;
1804
1805        nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1806        if (!nla)
1807                goto nla_put_failure;
1808
1809        eth_key = nla_data(nla);
1810        memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
1811        memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
1812
1813        if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1814                __be16 eth_type;
1815                eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1816                if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1817                    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1818                        goto nla_put_failure;
1819                encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1820                if (!swkey->eth.tci)
1821                        goto unencap;
1822        } else
1823                encap = NULL;
1824
1825        if (swkey->eth.type == htons(ETH_P_802_2)) {
1826                /*
1827                 * Ethertype 802.2 is represented in the netlink with omitted
1828                 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1829                 * 0xffff in the mask attribute.  Ethertype can also
1830                 * be wildcarded.
1831                 */
1832                if (is_mask && output->eth.type)
1833                        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1834                                                output->eth.type))
1835                                goto nla_put_failure;
1836                goto unencap;
1837        }
1838
1839        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1840                goto nla_put_failure;
1841
1842        if (swkey->eth.type == htons(ETH_P_IP)) {
1843                struct ovs_key_ipv4 *ipv4_key;
1844
1845                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
1846                if (!nla)
1847                        goto nla_put_failure;
1848                ipv4_key = nla_data(nla);
1849                ipv4_key->ipv4_src = output->ipv4.addr.src;
1850                ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1851                ipv4_key->ipv4_proto = output->ip.proto;
1852                ipv4_key->ipv4_tos = output->ip.tos;
1853                ipv4_key->ipv4_ttl = output->ip.ttl;
1854                ipv4_key->ipv4_frag = output->ip.frag;
1855        } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1856                struct ovs_key_ipv6 *ipv6_key;
1857
1858                nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
1859                if (!nla)
1860                        goto nla_put_failure;
1861                ipv6_key = nla_data(nla);
1862                memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1863                                sizeof(ipv6_key->ipv6_src));
1864                memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1865                                sizeof(ipv6_key->ipv6_dst));
1866                ipv6_key->ipv6_label = output->ipv6.label;
1867                ipv6_key->ipv6_proto = output->ip.proto;
1868                ipv6_key->ipv6_tclass = output->ip.tos;
1869                ipv6_key->ipv6_hlimit = output->ip.ttl;
1870                ipv6_key->ipv6_frag = output->ip.frag;
1871        } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1872                   swkey->eth.type == htons(ETH_P_RARP)) {
1873                struct ovs_key_arp *arp_key;
1874
1875                nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
1876                if (!nla)
1877                        goto nla_put_failure;
1878                arp_key = nla_data(nla);
1879                memset(arp_key, 0, sizeof(struct ovs_key_arp));
1880                arp_key->arp_sip = output->ipv4.addr.src;
1881                arp_key->arp_tip = output->ipv4.addr.dst;
1882                arp_key->arp_op = htons(output->ip.proto);
1883                memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
1884                memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
1885        }
1886
1887        if ((swkey->eth.type == htons(ETH_P_IP) ||
1888             swkey->eth.type == htons(ETH_P_IPV6)) &&
1889             swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1890
1891                if (swkey->ip.proto == IPPROTO_TCP) {
1892                        struct ovs_key_tcp *tcp_key;
1893
1894                        nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1895                        if (!nla)
1896                                goto nla_put_failure;
1897                        tcp_key = nla_data(nla);
1898                        if (swkey->eth.type == htons(ETH_P_IP)) {
1899                                tcp_key->tcp_src = output->ipv4.tp.src;
1900                                tcp_key->tcp_dst = output->ipv4.tp.dst;
1901                        } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1902                                tcp_key->tcp_src = output->ipv6.tp.src;
1903                                tcp_key->tcp_dst = output->ipv6.tp.dst;
1904                        }
1905                } else if (swkey->ip.proto == IPPROTO_UDP) {
1906                        struct ovs_key_udp *udp_key;
1907
1908                        nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1909                        if (!nla)
1910                                goto nla_put_failure;
1911                        udp_key = nla_data(nla);
1912                        if (swkey->eth.type == htons(ETH_P_IP)) {
1913                                udp_key->udp_src = output->ipv4.tp.src;
1914                                udp_key->udp_dst = output->ipv4.tp.dst;
1915                        } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1916                                udp_key->udp_src = output->ipv6.tp.src;
1917                                udp_key->udp_dst = output->ipv6.tp.dst;
1918                        }
1919                } else if (swkey->ip.proto == IPPROTO_SCTP) {
1920                        struct ovs_key_sctp *sctp_key;
1921
1922                        nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1923                        if (!nla)
1924                                goto nla_put_failure;
1925                        sctp_key = nla_data(nla);
1926                        if (swkey->eth.type == htons(ETH_P_IP)) {
1927                                sctp_key->sctp_src = swkey->ipv4.tp.src;
1928                                sctp_key->sctp_dst = swkey->ipv4.tp.dst;
1929                        } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1930                                sctp_key->sctp_src = swkey->ipv6.tp.src;
1931                                sctp_key->sctp_dst = swkey->ipv6.tp.dst;
1932                        }
1933                } else if (swkey->eth.type == htons(ETH_P_IP) &&
1934                           swkey->ip.proto == IPPROTO_ICMP) {
1935                        struct ovs_key_icmp *icmp_key;
1936
1937                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1938                        if (!nla)
1939                                goto nla_put_failure;
1940                        icmp_key = nla_data(nla);
1941                        icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
1942                        icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
1943                } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1944                           swkey->ip.proto == IPPROTO_ICMPV6) {
1945                        struct ovs_key_icmpv6 *icmpv6_key;
1946
1947                        nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1948                                                sizeof(*icmpv6_key));
1949                        if (!nla)
1950                                goto nla_put_failure;
1951                        icmpv6_key = nla_data(nla);
1952                        icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
1953                        icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
1954
1955                        if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1956                            icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1957                                struct ovs_key_nd *nd_key;
1958
1959                                nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1960                                if (!nla)
1961                                        goto nla_put_failure;
1962                                nd_key = nla_data(nla);
1963                                memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1964                                                        sizeof(nd_key->nd_target));
1965                                memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
1966                                memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
1967                        }
1968                }
1969        }
1970
1971unencap:
1972        if (encap)
1973                nla_nest_end(skb, encap);
1974
1975        return 0;
1976
1977nla_put_failure:
1978        return -EMSGSIZE;
1979}
1980
1981/* Initializes the flow module.
1982 * Returns zero if successful or a negative error code. */
1983int ovs_flow_init(void)
1984{
1985        BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
1986        BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
1987
1988        flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
1989                                        0, NULL);
1990        if (flow_cache == NULL)
1991                return -ENOMEM;
1992
1993        return 0;
1994}
1995
1996/* Uninitializes the flow module. */
1997void ovs_flow_exit(void)
1998{
1999        kmem_cache_destroy(flow_cache);
2000}
2001
2002struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
2003{
2004        struct sw_flow_mask *mask;
2005
2006        mask = kmalloc(sizeof(*mask), GFP_KERNEL);
2007        if (mask)
2008                mask->ref_count = 0;
2009
2010        return mask;
2011}
2012
2013void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
2014{
2015        mask->ref_count++;
2016}
2017
2018void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
2019{
2020        if (!mask)
2021                return;
2022
2023        BUG_ON(!mask->ref_count);
2024        mask->ref_count--;
2025
2026        if (!mask->ref_count) {
2027                list_del_rcu(&mask->list);
2028                if (deferred)
2029                        kfree_rcu(mask, rcu);
2030                else
2031                        kfree(mask);
2032        }
2033}
2034
2035static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
2036                const struct sw_flow_mask *b)
2037{
2038        u8 *a_ = (u8 *)&a->key + a->range.start;
2039        u8 *b_ = (u8 *)&b->key + b->range.start;
2040
2041        return  (a->range.end == b->range.end)
2042                && (a->range.start == b->range.start)
2043                && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
2044}
2045
2046struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
2047                                           const struct sw_flow_mask *mask)
2048{
2049        struct list_head *ml;
2050
2051        list_for_each(ml, tbl->mask_list) {
2052                struct sw_flow_mask *m;
2053                m = container_of(ml, struct sw_flow_mask, list);
2054                if (ovs_sw_flow_mask_equal(mask, m))
2055                        return m;
2056        }
2057
2058        return NULL;
2059}
2060
2061/**
2062 * add a new mask into the mask list.
2063 * The caller needs to make sure that 'mask' is not the same
2064 * as any masks that are already on the list.
2065 */
2066void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
2067{
2068        list_add_rcu(&mask->list, tbl->mask_list);
2069}
2070
2071/**
2072 * Set 'range' fields in the mask to the value of 'val'.
2073 */
2074static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
2075                struct sw_flow_key_range *range, u8 val)
2076{
2077        u8 *m = (u8 *)&mask->key + range->start;
2078
2079        mask->range = *range;
2080        memset(m, val, range_n_bytes(range));
2081}
2082