linux/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
<<
>>
Prefs
   1/*
   2 * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager
   3 *
   4 * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 *
  34 * Written by: Karen Xie (kxie@chelsio.com)
  35 */
  36
  37#define DRV_NAME "libcxgb"
  38#define DRV_VERSION "1.0.0-ko"
  39#define pr_fmt(fmt) DRV_NAME ": " fmt
  40
  41#include <linux/kernel.h>
  42#include <linux/module.h>
  43#include <linux/errno.h>
  44#include <linux/types.h>
  45#include <linux/debugfs.h>
  46#include <linux/export.h>
  47#include <linux/list.h>
  48#include <linux/skbuff.h>
  49#include <linux/pci.h>
  50#include <linux/scatterlist.h>
  51
  52#include "libcxgb_ppm.h"
  53
  54/* Direct Data Placement -
  55 * Directly place the iSCSI Data-In or Data-Out PDU's payload into
  56 * pre-posted final destination host-memory buffers based on the
  57 * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
  58 * in Data-Out PDUs. The host memory address is programmed into
  59 * h/w in the format of pagepod entries. The location of the
  60 * pagepod entry is encoded into ddp tag which is used as the base
  61 * for ITT/TTT.
  62 */
  63
  64/* Direct-Data Placement page size adjustment
  65 */
  66int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz)
  67{
  68        struct cxgbi_tag_format *tformat = &ppm->tformat;
  69        int i;
  70
  71        for (i = 0; i < DDP_PGIDX_MAX; i++) {
  72                if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT +
  73                                         tformat->pgsz_order[i])) {
  74                        pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
  75                                 __func__, ppm->ndev->name, pgsz, i);
  76                        return i;
  77                }
  78        }
  79        pr_info("ippm: ddp page size %lu not supported.\n", pgsz);
  80        return DDP_PGIDX_MAX;
  81}
  82
  83/* DDP setup & teardown
  84 */
  85static int ppm_find_unused_entries(unsigned long *bmap,
  86                                   unsigned int max_ppods,
  87                                   unsigned int start,
  88                                   unsigned int nr,
  89                                   unsigned int align_mask)
  90{
  91        unsigned long i;
  92
  93        i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask);
  94
  95        if (unlikely(i >= max_ppods) && (start > nr))
  96                i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1,
  97                                               align_mask);
  98        if (unlikely(i >= max_ppods))
  99                return -ENOSPC;
 100
 101        bitmap_set(bmap, i, nr);
 102        return (int)i;
 103}
 104
 105static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count,
 106                             unsigned long caller_data)
 107{
 108        struct cxgbi_ppod_data *pdata = ppm->ppod_data + i;
 109
 110        pdata->caller_data = caller_data;
 111        pdata->npods = count;
 112
 113        if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1))
 114                pdata->color = 0;
 115        else
 116                pdata->color++;
 117}
 118
 119static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count,
 120                               unsigned long caller_data)
 121{
 122        struct cxgbi_ppm_pool *pool;
 123        unsigned int cpu;
 124        int i;
 125
 126        if (!ppm->pool)
 127                return -EINVAL;
 128
 129        cpu = get_cpu();
 130        pool = per_cpu_ptr(ppm->pool, cpu);
 131        spin_lock_bh(&pool->lock);
 132        put_cpu();
 133
 134        i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max,
 135                                    pool->next, count, 0);
 136        if (i < 0) {
 137                pool->next = 0;
 138                spin_unlock_bh(&pool->lock);
 139                return -ENOSPC;
 140        }
 141
 142        pool->next = i + count;
 143        if (pool->next >= ppm->pool_index_max)
 144                pool->next = 0;
 145
 146        spin_unlock_bh(&pool->lock);
 147
 148        pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
 149                 __func__, cpu, i, count, i + cpu * ppm->pool_index_max,
 150                pool->next);
 151
 152        i += cpu * ppm->pool_index_max;
 153        ppm_mark_entries(ppm, i, count, caller_data);
 154
 155        return i;
 156}
 157
 158static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count,
 159                           unsigned long caller_data)
 160{
 161        int i;
 162
 163        spin_lock_bh(&ppm->map_lock);
 164        i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max,
 165                                    ppm->next, count, 0);
 166        if (i < 0) {
 167                ppm->next = 0;
 168                spin_unlock_bh(&ppm->map_lock);
 169                pr_debug("ippm: NO suitable entries %u available.\n",
 170                         count);
 171                return -ENOSPC;
 172        }
 173
 174        ppm->next = i + count;
 175        if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram))
 176                ppm->next = 0;
 177        else if (ppm->next >= ppm->bmap_index_max)
 178                ppm->next = 0;
 179
 180        spin_unlock_bh(&ppm->map_lock);
 181
 182        pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
 183                 __func__, i, count, i + ppm->pool_rsvd, ppm->next,
 184                 caller_data);
 185
 186        i += ppm->pool_rsvd;
 187        ppm_mark_entries(ppm, i, count, caller_data);
 188
 189        return i;
 190}
 191
 192static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count)
 193{
 194        pr_debug("%s: idx %d + %d.\n", __func__, i, count);
 195
 196        if (i < ppm->pool_rsvd) {
 197                unsigned int cpu;
 198                struct cxgbi_ppm_pool *pool;
 199
 200                cpu = i / ppm->pool_index_max;
 201                i %= ppm->pool_index_max;
 202
 203                pool = per_cpu_ptr(ppm->pool, cpu);
 204                spin_lock_bh(&pool->lock);
 205                bitmap_clear(pool->bmap, i, count);
 206
 207                if (i < pool->next)
 208                        pool->next = i;
 209                spin_unlock_bh(&pool->lock);
 210
 211                pr_debug("%s: cpu %u, idx %d, next %u.\n",
 212                         __func__, cpu, i, pool->next);
 213        } else {
 214                spin_lock_bh(&ppm->map_lock);
 215
 216                i -= ppm->pool_rsvd;
 217                bitmap_clear(ppm->ppod_bmap, i, count);
 218
 219                if (i < ppm->next)
 220                        ppm->next = i;
 221                spin_unlock_bh(&ppm->map_lock);
 222
 223                pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next);
 224        }
 225}
 226
 227void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx)
 228{
 229        struct cxgbi_ppod_data *pdata;
 230
 231        if (idx >= ppm->ppmax) {
 232                pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax);
 233                return;
 234        }
 235
 236        pdata = ppm->ppod_data + idx;
 237        if (!pdata->npods) {
 238                pr_warn("ippm: idx %u, npods 0.\n", idx);
 239                return;
 240        }
 241
 242        pr_debug("release idx %u, npods %u.\n", idx, pdata->npods);
 243        ppm_unmark_entries(ppm, idx, pdata->npods);
 244}
 245EXPORT_SYMBOL(cxgbi_ppm_ppod_release);
 246
 247int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages,
 248                            u32 per_tag_pg_idx, u32 *ppod_idx,
 249                            u32 *ddp_tag, unsigned long caller_data)
 250{
 251        struct cxgbi_ppod_data *pdata;
 252        unsigned int npods;
 253        int idx = -1;
 254        unsigned int hwidx;
 255        u32 tag;
 256
 257        npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
 258        if (!npods) {
 259                pr_warn("%s: pages %u -> npods %u, full.\n",
 260                        __func__, nr_pages, npods);
 261                return -EINVAL;
 262        }
 263
 264        /* grab from cpu pool first */
 265        idx = ppm_get_cpu_entries(ppm, npods, caller_data);
 266        /* try the general pool */
 267        if (idx < 0)
 268                idx = ppm_get_entries(ppm, npods, caller_data);
 269        if (idx < 0) {
 270                pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
 271                         nr_pages, npods, ppm->next, caller_data);
 272                return idx;
 273        }
 274
 275        pdata = ppm->ppod_data + idx;
 276        hwidx = ppm->base_idx + idx;
 277
 278        tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color);
 279
 280        if (per_tag_pg_idx)
 281                tag |= (per_tag_pg_idx << 30) & 0xC0000000;
 282
 283        *ppod_idx = idx;
 284        *ddp_tag = tag;
 285
 286        pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
 287                 nr_pages, tag, idx, npods, caller_data);
 288
 289        return npods;
 290}
 291EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve);
 292
 293void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
 294                             unsigned int tid, unsigned int offset,
 295                             unsigned int length,
 296                             struct cxgbi_pagepod_hdr *hdr)
 297{
 298        /* The ddp tag in pagepod should be with bit 31:30 set to 0.
 299         * The ddp Tag on the wire should be with non-zero 31:30 to the peer
 300         */
 301        tag &= 0x3FFFFFFF;
 302
 303        hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
 304
 305        hdr->rsvd = 0;
 306        hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask);
 307        hdr->max_offset = htonl(length);
 308        hdr->page_offset = htonl(offset);
 309
 310        pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
 311                 tag, tid, length, offset);
 312}
 313EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr);
 314
 315static void ppm_free(struct cxgbi_ppm *ppm)
 316{
 317        vfree(ppm);
 318}
 319
 320static void ppm_destroy(struct kref *kref)
 321{
 322        struct cxgbi_ppm *ppm = container_of(kref,
 323                                             struct cxgbi_ppm,
 324                                             refcnt);
 325        pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
 326                ppm->ndev->name, ppm);
 327
 328        *ppm->ppm_pp = NULL;
 329
 330        free_percpu(ppm->pool);
 331        ppm_free(ppm);
 332}
 333
 334int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
 335{
 336        if (ppm) {
 337                int rv;
 338
 339                rv = kref_put(&ppm->refcnt, ppm_destroy);
 340                return rv;
 341        }
 342        return 1;
 343}
 344EXPORT_SYMBOL(cxgbi_ppm_release);
 345
 346static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
 347                                                 unsigned int *pcpu_ppmax)
 348{
 349        struct cxgbi_ppm_pool *pools;
 350        unsigned int ppmax = (*total) / num_possible_cpus();
 351        unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3;
 352        unsigned int bmap;
 353        unsigned int alloc_sz;
 354        unsigned int count = 0;
 355        unsigned int cpu;
 356
 357        /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
 358        if (ppmax > max)
 359                ppmax = max;
 360
 361        /* pool size must be multiple of unsigned long */
 362        bmap = ppmax / BITS_PER_TYPE(unsigned long);
 363        if (!bmap)
 364                return NULL;
 365
 366        ppmax = (bmap * sizeof(unsigned long)) << 3;
 367
 368        alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
 369        pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool));
 370
 371        if (!pools)
 372                return NULL;
 373
 374        for_each_possible_cpu(cpu) {
 375                struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu);
 376
 377                memset(ppool, 0, alloc_sz);
 378                spin_lock_init(&ppool->lock);
 379                count += ppmax;
 380        }
 381
 382        *total = count;
 383        *pcpu_ppmax = ppmax;
 384
 385        return pools;
 386}
 387
 388int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
 389                   struct pci_dev *pdev, void *lldev,
 390                   struct cxgbi_tag_format *tformat, unsigned int iscsi_size,
 391                   unsigned int llimit, unsigned int start,
 392                   unsigned int reserve_factor, unsigned int iscsi_edram_start,
 393                   unsigned int iscsi_edram_size)
 394{
 395        struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
 396        struct cxgbi_ppm_pool *pool = NULL;
 397        unsigned int pool_index_max = 0;
 398        unsigned int ppmax_pool = 0;
 399        unsigned int ppod_bmap_size;
 400        unsigned int alloc_sz;
 401        unsigned int ppmax;
 402
 403        if (!iscsi_edram_start)
 404                iscsi_edram_size = 0;
 405
 406        if (iscsi_edram_size &&
 407            ((iscsi_edram_start + iscsi_edram_size) != start)) {
 408                pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x "
 409                        "size 0x%x DDR start 0x%x\n",
 410                        iscsi_edram_start, iscsi_edram_size, start);
 411                return -EINVAL;
 412        }
 413
 414        if (iscsi_edram_size) {
 415                reserve_factor = 0;
 416                start = iscsi_edram_start;
 417        }
 418
 419        ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT;
 420
 421        if (ppm) {
 422                pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
 423                        ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax);
 424                kref_get(&ppm->refcnt);
 425                return 1;
 426        }
 427
 428        if (reserve_factor) {
 429                ppmax_pool = ppmax / reserve_factor;
 430                pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
 431                if (!pool) {
 432                        ppmax_pool = 0;
 433                        reserve_factor = 0;
 434                }
 435
 436                pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
 437                         ndev->name, ppmax, ppmax_pool, pool_index_max);
 438        }
 439
 440        ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool);
 441        alloc_sz = sizeof(struct cxgbi_ppm) +
 442                        ppmax * (sizeof(struct cxgbi_ppod_data)) +
 443                        ppod_bmap_size * sizeof(unsigned long);
 444
 445        ppm = vzalloc(alloc_sz);
 446        if (!ppm)
 447                goto release_ppm_pool;
 448
 449        ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]);
 450
 451        if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) {
 452                unsigned int start = ppmax - ppmax_pool;
 453                unsigned int end = ppod_bmap_size >> 3;
 454
 455                bitmap_set(ppm->ppod_bmap, ppmax, end - start);
 456                pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
 457                        __func__, ppmax, ppmax_pool, ppod_bmap_size, start,
 458                        end);
 459        }
 460        if (iscsi_edram_size) {
 461                unsigned int first_ddr_idx =
 462                                iscsi_edram_size >> PPOD_SIZE_SHIFT;
 463
 464                ppm->max_index_in_edram = first_ddr_idx - 1;
 465                bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1);
 466                pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx);
 467        }
 468
 469        spin_lock_init(&ppm->map_lock);
 470        kref_init(&ppm->refcnt);
 471
 472        memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format));
 473
 474        ppm->ppm_pp = ppm_pp;
 475        ppm->ndev = ndev;
 476        ppm->pdev = pdev;
 477        ppm->lldev = lldev;
 478        ppm->ppmax = ppmax;
 479        ppm->next = 0;
 480        ppm->llimit = llimit;
 481        ppm->base_idx = start > llimit ?
 482                        (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0;
 483        ppm->bmap_index_max = ppmax - ppmax_pool;
 484
 485        ppm->pool = pool;
 486        ppm->pool_rsvd = ppmax_pool;
 487        ppm->pool_index_max = pool_index_max;
 488
 489        /* check one more time */
 490        if (*ppm_pp) {
 491                ppm_free(ppm);
 492                ppm = (struct cxgbi_ppm *)(*ppm_pp);
 493
 494                pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
 495                        ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax);
 496
 497                kref_get(&ppm->refcnt);
 498                return 1;
 499        }
 500        *ppm_pp = ppm;
 501
 502        ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE);
 503
 504        pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
 505                ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE,
 506                ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd,
 507                ppm->pool_index_max);
 508
 509        return 0;
 510
 511release_ppm_pool:
 512        free_percpu(pool);
 513        return -ENOMEM;
 514}
 515EXPORT_SYMBOL(cxgbi_ppm_init);
 516
 517unsigned int cxgbi_tagmask_set(unsigned int ppmax)
 518{
 519        unsigned int bits = fls(ppmax);
 520
 521        if (bits > PPOD_IDX_MAX_SIZE)
 522                bits = PPOD_IDX_MAX_SIZE;
 523
 524        pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
 525                ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT));
 526
 527        return 1 << (bits + PPOD_IDX_SHIFT);
 528}
 529EXPORT_SYMBOL(cxgbi_tagmask_set);
 530
 531MODULE_AUTHOR("Chelsio Communications");
 532MODULE_DESCRIPTION("Chelsio common library");
 533MODULE_VERSION(DRV_VERSION);
 534MODULE_LICENSE("Dual BSD/GPL");
 535