linux/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2017 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48/*
  49 * This file contains OPA VNIC encapsulation/decapsulation function.
  50 */
  51
  52#include <linux/if_ether.h>
  53#include <linux/if_vlan.h>
  54
  55#include "opa_vnic_internal.h"
  56
  57/* OPA 16B Header fields */
  58#define OPA_16B_LID_MASK        0xFFFFFull
  59#define OPA_16B_SLID_HIGH_SHFT  8
  60#define OPA_16B_SLID_MASK       0xF00ull
  61#define OPA_16B_DLID_MASK       0xF000ull
  62#define OPA_16B_DLID_HIGH_SHFT  12
  63#define OPA_16B_LEN_SHFT        20
  64#define OPA_16B_SC_SHFT         20
  65#define OPA_16B_RC_SHFT         25
  66#define OPA_16B_PKEY_SHFT       16
  67
  68#define OPA_VNIC_L4_HDR_SHFT    16
  69
  70/* L2+L4 hdr len is 20 bytes (5 quad words) */
  71#define OPA_VNIC_HDR_QW_LEN   5
  72
  73static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len,
  74                                        u16 pkey, u16 entropy, u8 sc, u8 rc,
  75                                        u8 l4_type, u16 l4_hdr)
  76{
  77        /* h[1]: LT=1, 16B L2=10 */
  78        u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0};
  79
  80        h[2] = l4_type;
  81        h[3] = entropy;
  82        h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT;
  83
  84        /* Extract and set 4 upper bits and 20 lower bits of the lids */
  85        h[0] |= (slid & OPA_16B_LID_MASK);
  86        h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK);
  87
  88        h[1] |= (dlid & OPA_16B_LID_MASK);
  89        h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK);
  90
  91        h[0] |= (len << OPA_16B_LEN_SHFT);
  92        h[1] |= (rc << OPA_16B_RC_SHFT);
  93        h[1] |= (sc << OPA_16B_SC_SHFT);
  94        h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT);
  95
  96        memcpy(hdr, h, OPA_VNIC_HDR_LEN);
  97}
  98
  99/*
 100 * Using a simple hash table for mac table implementation with the last octet
 101 * of mac address as a key.
 102 */
 103static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
 104{
 105        struct opa_vnic_mac_tbl_node *node;
 106        struct hlist_node *tmp;
 107        int bkt;
 108
 109        if (!mactbl)
 110                return;
 111
 112        vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
 113                hash_del(&node->hlist);
 114                kfree(node);
 115        }
 116        kfree(mactbl);
 117}
 118
 119static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
 120{
 121        u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
 122        struct hlist_head *mactbl;
 123
 124        mactbl = kzalloc(size, GFP_KERNEL);
 125        if (!mactbl)
 126                return ERR_PTR(-ENOMEM);
 127
 128        vnic_hash_init(mactbl);
 129        return mactbl;
 130}
 131
 132/* opa_vnic_release_mac_tbl - empty and free the mac table */
 133void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
 134{
 135        struct hlist_head *mactbl;
 136
 137        mutex_lock(&adapter->mactbl_lock);
 138        mactbl = rcu_access_pointer(adapter->mactbl);
 139        rcu_assign_pointer(adapter->mactbl, NULL);
 140        synchronize_rcu();
 141        opa_vnic_free_mac_tbl(mactbl);
 142        adapter->info.vport.mac_tbl_digest = 0;
 143        mutex_unlock(&adapter->mactbl_lock);
 144}
 145
 146/*
 147 * opa_vnic_query_mac_tbl - query the mac table for a section
 148 *
 149 * This function implements query of specific function of the mac table.
 150 * The function also expects the requested range to be valid.
 151 */
 152void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
 153                            struct opa_veswport_mactable *tbl)
 154{
 155        struct opa_vnic_mac_tbl_node *node;
 156        struct hlist_head *mactbl;
 157        int bkt;
 158        u16 loffset, lnum_entries;
 159
 160        rcu_read_lock();
 161        mactbl = rcu_dereference(adapter->mactbl);
 162        if (!mactbl)
 163                goto get_mac_done;
 164
 165        loffset = be16_to_cpu(tbl->offset);
 166        lnum_entries = be16_to_cpu(tbl->num_entries);
 167
 168        vnic_hash_for_each(mactbl, bkt, node, hlist) {
 169                struct __opa_vnic_mactable_entry *nentry = &node->entry;
 170                struct opa_veswport_mactable_entry *entry;
 171
 172                if ((node->index < loffset) ||
 173                    (node->index >= (loffset + lnum_entries)))
 174                        continue;
 175
 176                /* populate entry in the tbl corresponding to the index */
 177                entry = &tbl->tbl_entries[node->index - loffset];
 178                memcpy(entry->mac_addr, nentry->mac_addr,
 179                       ARRAY_SIZE(entry->mac_addr));
 180                memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
 181                       ARRAY_SIZE(entry->mac_addr_mask));
 182                entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
 183        }
 184        tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
 185get_mac_done:
 186        rcu_read_unlock();
 187}
 188
 189/*
 190 * opa_vnic_update_mac_tbl - update mac table section
 191 *
 192 * This function updates the specified section of the mac table.
 193 * The procedure includes following steps.
 194 *  - Allocate a new mac (hash) table.
 195 *  - Add the specified entries to the new table.
 196 *    (except the ones that are requested to be deleted).
 197 *  - Add all the other entries from the old mac table.
 198 *  - If there is a failure, free the new table and return.
 199 *  - Switch to the new table.
 200 *  - Free the old table and return.
 201 *
 202 * The function also expects the requested range to be valid.
 203 */
 204int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
 205                            struct opa_veswport_mactable *tbl)
 206{
 207        struct opa_vnic_mac_tbl_node *node, *new_node;
 208        struct hlist_head *new_mactbl, *old_mactbl;
 209        int i, bkt, rc = 0;
 210        u8 key;
 211        u16 loffset, lnum_entries;
 212
 213        mutex_lock(&adapter->mactbl_lock);
 214        /* allocate new mac table */
 215        new_mactbl = opa_vnic_alloc_mac_tbl();
 216        if (IS_ERR(new_mactbl)) {
 217                mutex_unlock(&adapter->mactbl_lock);
 218                return PTR_ERR(new_mactbl);
 219        }
 220
 221        loffset = be16_to_cpu(tbl->offset);
 222        lnum_entries = be16_to_cpu(tbl->num_entries);
 223
 224        /* add updated entries to the new mac table */
 225        for (i = 0; i < lnum_entries; i++) {
 226                struct __opa_vnic_mactable_entry *nentry;
 227                struct opa_veswport_mactable_entry *entry =
 228                                                        &tbl->tbl_entries[i];
 229                u8 *mac_addr = entry->mac_addr;
 230                u8 empty_mac[ETH_ALEN] = { 0 };
 231
 232                v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
 233                      loffset + i, mac_addr[0], mac_addr[1], mac_addr[2],
 234                      mac_addr[3], mac_addr[4], mac_addr[5],
 235                      entry->dlid_sd);
 236
 237                /* if the entry is being removed, do not add it */
 238                if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac)))
 239                        continue;
 240
 241                node = kzalloc(sizeof(*node), GFP_KERNEL);
 242                if (!node) {
 243                        rc = -ENOMEM;
 244                        goto updt_done;
 245                }
 246
 247                node->index = loffset + i;
 248                nentry = &node->entry;
 249                memcpy(nentry->mac_addr, entry->mac_addr,
 250                       ARRAY_SIZE(nentry->mac_addr));
 251                memcpy(nentry->mac_addr_mask, entry->mac_addr_mask,
 252                       ARRAY_SIZE(nentry->mac_addr_mask));
 253                nentry->dlid_sd = be32_to_cpu(entry->dlid_sd);
 254                key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
 255                vnic_hash_add(new_mactbl, &node->hlist, key);
 256        }
 257
 258        /* add other entries from current mac table to new mac table */
 259        old_mactbl = rcu_access_pointer(adapter->mactbl);
 260        if (!old_mactbl)
 261                goto switch_tbl;
 262
 263        vnic_hash_for_each(old_mactbl, bkt, node, hlist) {
 264                if ((node->index >= loffset) &&
 265                    (node->index < (loffset + lnum_entries)))
 266                        continue;
 267
 268                new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
 269                if (!new_node) {
 270                        rc = -ENOMEM;
 271                        goto updt_done;
 272                }
 273
 274                new_node->index = node->index;
 275                memcpy(&new_node->entry, &node->entry, sizeof(node->entry));
 276                key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
 277                vnic_hash_add(new_mactbl, &new_node->hlist, key);
 278        }
 279
 280switch_tbl:
 281        /* switch to new table */
 282        rcu_assign_pointer(adapter->mactbl, new_mactbl);
 283        synchronize_rcu();
 284
 285        adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest);
 286updt_done:
 287        /* upon failure, free the new table; otherwise, free the old table */
 288        if (rc)
 289                opa_vnic_free_mac_tbl(new_mactbl);
 290        else
 291                opa_vnic_free_mac_tbl(old_mactbl);
 292
 293        mutex_unlock(&adapter->mactbl_lock);
 294        return rc;
 295}
 296
 297/* opa_vnic_chk_mac_tbl - check mac table for dlid */
 298static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter,
 299                                     struct ethhdr *mac_hdr)
 300{
 301        struct opa_vnic_mac_tbl_node *node;
 302        struct hlist_head *mactbl;
 303        u32 dlid = 0;
 304        u8 key;
 305
 306        rcu_read_lock();
 307        mactbl = rcu_dereference(adapter->mactbl);
 308        if (unlikely(!mactbl))
 309                goto chk_done;
 310
 311        key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX];
 312        vnic_hash_for_each_possible(mactbl, node, hlist, key) {
 313                struct __opa_vnic_mactable_entry *entry = &node->entry;
 314
 315                /* if related to source mac, skip */
 316                if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd)))
 317                        continue;
 318
 319                if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest,
 320                            ARRAY_SIZE(node->entry.mac_addr))) {
 321                        /* mac address found */
 322                        dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd);
 323                        break;
 324                }
 325        }
 326
 327chk_done:
 328        rcu_read_unlock();
 329        return dlid;
 330}
 331
 332/* opa_vnic_get_dlid - find and return the DLID */
 333static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
 334                                  struct sk_buff *skb, u8 def_port)
 335{
 336        struct __opa_veswport_info *info = &adapter->info;
 337        struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
 338        u32 dlid;
 339
 340        dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr);
 341        if (dlid)
 342                return dlid;
 343
 344        if (is_multicast_ether_addr(mac_hdr->h_dest)) {
 345                dlid = info->vesw.u_mcast_dlid;
 346        } else {
 347                if (is_local_ether_addr(mac_hdr->h_dest)) {
 348                        dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
 349                                ((uint32_t)mac_hdr->h_dest[4] << 8)  |
 350                                mac_hdr->h_dest[3];
 351                        if (unlikely(!dlid))
 352                                v_warn("Null dlid in MAC address\n");
 353                } else if (def_port != OPA_VNIC_INVALID_PORT) {
 354                        if (def_port < OPA_VESW_MAX_NUM_DEF_PORT)
 355                                dlid = info->vesw.u_ucast_dlid[def_port];
 356                }
 357        }
 358
 359        return dlid;
 360}
 361
 362/* opa_vnic_get_sc - return the service class */
 363static u8 opa_vnic_get_sc(struct __opa_veswport_info *info,
 364                          struct sk_buff *skb)
 365{
 366        struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
 367        u16 vlan_tci;
 368        u8 sc;
 369
 370        if (!__vlan_get_tag(skb, &vlan_tci)) {
 371                u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci);
 372
 373                if (is_multicast_ether_addr(mac_hdr->h_dest))
 374                        sc = info->vport.pcp_to_sc_mc[pcp];
 375                else
 376                        sc = info->vport.pcp_to_sc_uc[pcp];
 377        } else {
 378                if (is_multicast_ether_addr(mac_hdr->h_dest))
 379                        sc = info->vport.non_vlan_sc_mc;
 380                else
 381                        sc = info->vport.non_vlan_sc_uc;
 382        }
 383
 384        return sc;
 385}
 386
 387u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
 388{
 389        struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
 390        struct __opa_veswport_info *info = &adapter->info;
 391        u8 vl;
 392
 393        if (skb_vlan_tag_present(skb)) {
 394                u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
 395
 396                if (is_multicast_ether_addr(mac_hdr->h_dest))
 397                        vl = info->vport.pcp_to_vl_mc[pcp];
 398                else
 399                        vl = info->vport.pcp_to_vl_uc[pcp];
 400        } else {
 401                if (is_multicast_ether_addr(mac_hdr->h_dest))
 402                        vl = info->vport.non_vlan_vl_mc;
 403                else
 404                        vl = info->vport.non_vlan_vl_uc;
 405        }
 406
 407        return vl;
 408}
 409
 410/* opa_vnic_get_rc - return the routing control */
 411static u8 opa_vnic_get_rc(struct __opa_veswport_info *info,
 412                          struct sk_buff *skb)
 413{
 414        u8 proto, rout_ctrl;
 415
 416        switch (vlan_get_protocol(skb)) {
 417        case htons(ETH_P_IPV6):
 418                proto = ipv6_hdr(skb)->nexthdr;
 419                if (proto == IPPROTO_TCP)
 420                        rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc,
 421                                                          IPV6_TCP);
 422                else if (proto == IPPROTO_UDP)
 423                        rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc,
 424                                                          IPV6_UDP);
 425                else
 426                        rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV6);
 427                break;
 428        case htons(ETH_P_IP):
 429                proto = ip_hdr(skb)->protocol;
 430                if (proto == IPPROTO_TCP)
 431                        rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc,
 432                                                          IPV4_TCP);
 433                else if (proto == IPPROTO_UDP)
 434                        rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc,
 435                                                          IPV4_UDP);
 436                else
 437                        rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV4);
 438                break;
 439        default:
 440                rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, DEFAULT);
 441        }
 442
 443        return rout_ctrl;
 444}
 445
 446/* opa_vnic_calc_entropy - calculate the packet entropy */
 447u8 opa_vnic_calc_entropy(struct sk_buff *skb)
 448{
 449        u32 hash = skb_get_hash(skb);
 450
 451        /* store XOR of all bytes in lower 8 bits */
 452        hash ^= hash >> 8;
 453        hash ^= hash >> 16;
 454
 455        /* return lower 8 bits as entropy */
 456        return (u8)(hash & 0xFF);
 457}
 458
 459/* opa_vnic_get_def_port - get default port based on entropy */
 460static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter,
 461                                       u8 entropy)
 462{
 463        u8 flow_id;
 464
 465        /* Add the upper and lower 4-bits of entropy to get the flow id */
 466        flow_id = ((entropy & 0xf) + (entropy >> 4));
 467        return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)];
 468}
 469
 470/* Calculate packet length including OPA header, crc and padding */
 471static inline int opa_vnic_wire_length(struct sk_buff *skb)
 472{
 473        u32 pad_len;
 474
 475        /* padding for 8 bytes size alignment */
 476        pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
 477        pad_len += OPA_VNIC_ICRC_TAIL_LEN;
 478
 479        return (skb->len + pad_len) >> 3;
 480}
 481
 482/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
 483void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
 484{
 485        struct __opa_veswport_info *info = &adapter->info;
 486        struct opa_vnic_skb_mdata *mdata;
 487        u8 def_port, sc, rc, entropy, *hdr;
 488        u16 len, l4_hdr;
 489        u32 dlid;
 490
 491        hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
 492
 493        entropy = opa_vnic_calc_entropy(skb);
 494        def_port = opa_vnic_get_def_port(adapter, entropy);
 495        len = opa_vnic_wire_length(skb);
 496        dlid = opa_vnic_get_dlid(adapter, skb, def_port);
 497        sc = opa_vnic_get_sc(info, skb);
 498        rc = opa_vnic_get_rc(info, skb);
 499        l4_hdr = info->vesw.vesw_id;
 500
 501        mdata = skb_push(skb, sizeof(*mdata));
 502        mdata->vl = opa_vnic_get_vl(adapter, skb);
 503        mdata->entropy = entropy;
 504        mdata->flags = 0;
 505        if (unlikely(!dlid)) {
 506                mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR;
 507                return;
 508        }
 509
 510        opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len,
 511                             info->vesw.pkey, entropy, sc, rc,
 512                             OPA_VNIC_L4_ETHR, l4_hdr);
 513}
 514