linux/drivers/infiniband/hw/cxgb4/device.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/moduleparam.h>
  34#include <linux/debugfs.h>
  35#include <linux/vmalloc.h>
  36
  37#include <rdma/ib_verbs.h>
  38
  39#include "iw_cxgb4.h"
  40
  41#define DRV_VERSION "0.1"
  42
  43MODULE_AUTHOR("Steve Wise");
  44MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
  45MODULE_LICENSE("Dual BSD/GPL");
  46MODULE_VERSION(DRV_VERSION);
  47
  48static int allow_db_fc_on_t5;
  49module_param(allow_db_fc_on_t5, int, 0644);
  50MODULE_PARM_DESC(allow_db_fc_on_t5,
  51                 "Allow DB Flow Control on T5 (default = 0)");
  52
  53static int allow_db_coalescing_on_t5;
  54module_param(allow_db_coalescing_on_t5, int, 0644);
  55MODULE_PARM_DESC(allow_db_coalescing_on_t5,
  56                 "Allow DB Coalescing on T5 (default = 0)");
  57
  58struct uld_ctx {
  59        struct list_head entry;
  60        struct cxgb4_lld_info lldi;
  61        struct c4iw_dev *dev;
  62};
  63
  64static LIST_HEAD(uld_ctx_list);
  65static DEFINE_MUTEX(dev_mutex);
  66
  67static struct dentry *c4iw_debugfs_root;
  68
  69struct c4iw_debugfs_data {
  70        struct c4iw_dev *devp;
  71        char *buf;
  72        int bufsize;
  73        int pos;
  74};
  75
  76static int count_idrs(int id, void *p, void *data)
  77{
  78        int *countp = data;
  79
  80        *countp = *countp + 1;
  81        return 0;
  82}
  83
  84static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
  85                            loff_t *ppos)
  86{
  87        struct c4iw_debugfs_data *d = file->private_data;
  88
  89        return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
  90}
  91
  92static int dump_qp(int id, void *p, void *data)
  93{
  94        struct c4iw_qp *qp = p;
  95        struct c4iw_debugfs_data *qpd = data;
  96        int space;
  97        int cc;
  98
  99        if (id != qp->wq.sq.qid)
 100                return 0;
 101
 102        space = qpd->bufsize - qpd->pos - 1;
 103        if (space == 0)
 104                return 1;
 105
 106        if (qp->ep)
 107                cc = snprintf(qpd->buf + qpd->pos, space,
 108                             "qp sq id %u rq id %u state %u onchip %u "
 109                             "ep tid %u state %u %pI4:%u->%pI4:%u\n",
 110                             qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
 111                             qp->wq.sq.flags & T4_SQ_ONCHIP,
 112                             qp->ep->hwtid, (int)qp->ep->com.state,
 113                             &qp->ep->com.local_addr.sin_addr.s_addr,
 114                             ntohs(qp->ep->com.local_addr.sin_port),
 115                             &qp->ep->com.remote_addr.sin_addr.s_addr,
 116                             ntohs(qp->ep->com.remote_addr.sin_port));
 117        else
 118                cc = snprintf(qpd->buf + qpd->pos, space,
 119                             "qp sq id %u rq id %u state %u onchip %u\n",
 120                              qp->wq.sq.qid, qp->wq.rq.qid,
 121                              (int)qp->attr.state,
 122                              qp->wq.sq.flags & T4_SQ_ONCHIP);
 123        if (cc < space)
 124                qpd->pos += cc;
 125        return 0;
 126}
 127
 128static int qp_release(struct inode *inode, struct file *file)
 129{
 130        struct c4iw_debugfs_data *qpd = file->private_data;
 131        if (!qpd) {
 132                printk(KERN_INFO "%s null qpd?\n", __func__);
 133                return 0;
 134        }
 135        vfree(qpd->buf);
 136        kfree(qpd);
 137        return 0;
 138}
 139
 140static int qp_open(struct inode *inode, struct file *file)
 141{
 142        struct c4iw_debugfs_data *qpd;
 143        int ret = 0;
 144        int count = 1;
 145
 146        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
 147        if (!qpd) {
 148                ret = -ENOMEM;
 149                goto out;
 150        }
 151        qpd->devp = inode->i_private;
 152        qpd->pos = 0;
 153
 154        spin_lock_irq(&qpd->devp->lock);
 155        idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
 156        spin_unlock_irq(&qpd->devp->lock);
 157
 158        qpd->bufsize = count * 128;
 159        qpd->buf = vmalloc(qpd->bufsize);
 160        if (!qpd->buf) {
 161                ret = -ENOMEM;
 162                goto err1;
 163        }
 164
 165        spin_lock_irq(&qpd->devp->lock);
 166        idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
 167        spin_unlock_irq(&qpd->devp->lock);
 168
 169        qpd->buf[qpd->pos++] = 0;
 170        file->private_data = qpd;
 171        goto out;
 172err1:
 173        kfree(qpd);
 174out:
 175        return ret;
 176}
 177
 178static const struct file_operations qp_debugfs_fops = {
 179        .owner   = THIS_MODULE,
 180        .open    = qp_open,
 181        .release = qp_release,
 182        .read    = debugfs_read,
 183        .llseek  = default_llseek,
 184};
 185
 186static int dump_stag(int id, void *p, void *data)
 187{
 188        struct c4iw_debugfs_data *stagd = data;
 189        int space;
 190        int cc;
 191
 192        space = stagd->bufsize - stagd->pos - 1;
 193        if (space == 0)
 194                return 1;
 195
 196        cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8);
 197        if (cc < space)
 198                stagd->pos += cc;
 199        return 0;
 200}
 201
 202static int stag_release(struct inode *inode, struct file *file)
 203{
 204        struct c4iw_debugfs_data *stagd = file->private_data;
 205        if (!stagd) {
 206                printk(KERN_INFO "%s null stagd?\n", __func__);
 207                return 0;
 208        }
 209        kfree(stagd->buf);
 210        kfree(stagd);
 211        return 0;
 212}
 213
 214static int stag_open(struct inode *inode, struct file *file)
 215{
 216        struct c4iw_debugfs_data *stagd;
 217        int ret = 0;
 218        int count = 1;
 219
 220        stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
 221        if (!stagd) {
 222                ret = -ENOMEM;
 223                goto out;
 224        }
 225        stagd->devp = inode->i_private;
 226        stagd->pos = 0;
 227
 228        spin_lock_irq(&stagd->devp->lock);
 229        idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
 230        spin_unlock_irq(&stagd->devp->lock);
 231
 232        stagd->bufsize = count * sizeof("0x12345678\n");
 233        stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL);
 234        if (!stagd->buf) {
 235                ret = -ENOMEM;
 236                goto err1;
 237        }
 238
 239        spin_lock_irq(&stagd->devp->lock);
 240        idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
 241        spin_unlock_irq(&stagd->devp->lock);
 242
 243        stagd->buf[stagd->pos++] = 0;
 244        file->private_data = stagd;
 245        goto out;
 246err1:
 247        kfree(stagd);
 248out:
 249        return ret;
 250}
 251
 252static const struct file_operations stag_debugfs_fops = {
 253        .owner   = THIS_MODULE,
 254        .open    = stag_open,
 255        .release = stag_release,
 256        .read    = debugfs_read,
 257        .llseek  = default_llseek,
 258};
 259
 260static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
 261
 262static int stats_show(struct seq_file *seq, void *v)
 263{
 264        struct c4iw_dev *dev = seq->private;
 265
 266        seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
 267                   "Max", "Fail");
 268        seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
 269                        dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
 270                        dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
 271        seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 272                        dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 273                        dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
 274        seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 275                        dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 276                        dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
 277        seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
 278                        dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
 279                        dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
 280        seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
 281                        dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
 282                        dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
 283        seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
 284                        dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
 285                        dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
 286        seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
 287        seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
 288        seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
 289        seq_printf(seq, " DB State: %s Transitions %llu\n",
 290                   db_state_str[dev->db_state],
 291                   dev->rdev.stats.db_state_transitions);
 292        seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
 293        seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
 294                   dev->rdev.stats.act_ofld_conn_fails);
 295        seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
 296                   dev->rdev.stats.pas_ofld_conn_fails);
 297        return 0;
 298}
 299
 300static int stats_open(struct inode *inode, struct file *file)
 301{
 302        return single_open(file, stats_show, inode->i_private);
 303}
 304
 305static ssize_t stats_clear(struct file *file, const char __user *buf,
 306                size_t count, loff_t *pos)
 307{
 308        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 309
 310        mutex_lock(&dev->rdev.stats.lock);
 311        dev->rdev.stats.pd.max = 0;
 312        dev->rdev.stats.pd.fail = 0;
 313        dev->rdev.stats.qid.max = 0;
 314        dev->rdev.stats.qid.fail = 0;
 315        dev->rdev.stats.stag.max = 0;
 316        dev->rdev.stats.stag.fail = 0;
 317        dev->rdev.stats.pbl.max = 0;
 318        dev->rdev.stats.pbl.fail = 0;
 319        dev->rdev.stats.rqt.max = 0;
 320        dev->rdev.stats.rqt.fail = 0;
 321        dev->rdev.stats.ocqp.max = 0;
 322        dev->rdev.stats.ocqp.fail = 0;
 323        dev->rdev.stats.db_full = 0;
 324        dev->rdev.stats.db_empty = 0;
 325        dev->rdev.stats.db_drop = 0;
 326        dev->rdev.stats.db_state_transitions = 0;
 327        dev->rdev.stats.tcam_full = 0;
 328        dev->rdev.stats.act_ofld_conn_fails = 0;
 329        dev->rdev.stats.pas_ofld_conn_fails = 0;
 330        mutex_unlock(&dev->rdev.stats.lock);
 331        return count;
 332}
 333
 334static const struct file_operations stats_debugfs_fops = {
 335        .owner   = THIS_MODULE,
 336        .open    = stats_open,
 337        .release = single_release,
 338        .read    = seq_read,
 339        .llseek  = seq_lseek,
 340        .write   = stats_clear,
 341};
 342
 343static int dump_ep(int id, void *p, void *data)
 344{
 345        struct c4iw_ep *ep = p;
 346        struct c4iw_debugfs_data *epd = data;
 347        int space;
 348        int cc;
 349
 350        space = epd->bufsize - epd->pos - 1;
 351        if (space == 0)
 352                return 1;
 353
 354        cc = snprintf(epd->buf + epd->pos, space,
 355                        "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx "
 356                        "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n",
 357                        ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state,
 358                        ep->com.flags, ep->com.history, ep->hwtid, ep->atid,
 359                        &ep->com.local_addr.sin_addr.s_addr,
 360                        ntohs(ep->com.local_addr.sin_port),
 361                        &ep->com.remote_addr.sin_addr.s_addr,
 362                        ntohs(ep->com.remote_addr.sin_port));
 363        if (cc < space)
 364                epd->pos += cc;
 365        return 0;
 366}
 367
 368static int dump_listen_ep(int id, void *p, void *data)
 369{
 370        struct c4iw_listen_ep *ep = p;
 371        struct c4iw_debugfs_data *epd = data;
 372        int space;
 373        int cc;
 374
 375        space = epd->bufsize - epd->pos - 1;
 376        if (space == 0)
 377                return 1;
 378
 379        cc = snprintf(epd->buf + epd->pos, space,
 380                        "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d "
 381                        "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state,
 382                        ep->com.flags, ep->stid, ep->backlog,
 383                        &ep->com.local_addr.sin_addr.s_addr,
 384                        ntohs(ep->com.local_addr.sin_port));
 385        if (cc < space)
 386                epd->pos += cc;
 387        return 0;
 388}
 389
 390static int ep_release(struct inode *inode, struct file *file)
 391{
 392        struct c4iw_debugfs_data *epd = file->private_data;
 393        if (!epd) {
 394                pr_info("%s null qpd?\n", __func__);
 395                return 0;
 396        }
 397        vfree(epd->buf);
 398        kfree(epd);
 399        return 0;
 400}
 401
 402static int ep_open(struct inode *inode, struct file *file)
 403{
 404        struct c4iw_debugfs_data *epd;
 405        int ret = 0;
 406        int count = 1;
 407
 408        epd = kmalloc(sizeof(*epd), GFP_KERNEL);
 409        if (!epd) {
 410                ret = -ENOMEM;
 411                goto out;
 412        }
 413        epd->devp = inode->i_private;
 414        epd->pos = 0;
 415
 416        spin_lock_irq(&epd->devp->lock);
 417        idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
 418        idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
 419        idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
 420        spin_unlock_irq(&epd->devp->lock);
 421
 422        epd->bufsize = count * 160;
 423        epd->buf = vmalloc(epd->bufsize);
 424        if (!epd->buf) {
 425                ret = -ENOMEM;
 426                goto err1;
 427        }
 428
 429        spin_lock_irq(&epd->devp->lock);
 430        idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
 431        idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
 432        idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
 433        spin_unlock_irq(&epd->devp->lock);
 434
 435        file->private_data = epd;
 436        goto out;
 437err1:
 438        kfree(epd);
 439out:
 440        return ret;
 441}
 442
 443static const struct file_operations ep_debugfs_fops = {
 444        .owner   = THIS_MODULE,
 445        .open    = ep_open,
 446        .release = ep_release,
 447        .read    = debugfs_read,
 448};
 449
 450static int setup_debugfs(struct c4iw_dev *devp)
 451{
 452        struct dentry *de;
 453
 454        if (!devp->debugfs_root)
 455                return -1;
 456
 457        de = debugfs_create_file("qps", S_IWUSR, devp->debugfs_root,
 458                                 (void *)devp, &qp_debugfs_fops);
 459        if (de && de->d_inode)
 460                de->d_inode->i_size = 4096;
 461
 462        de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root,
 463                                 (void *)devp, &stag_debugfs_fops);
 464        if (de && de->d_inode)
 465                de->d_inode->i_size = 4096;
 466
 467        de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root,
 468                        (void *)devp, &stats_debugfs_fops);
 469        if (de && de->d_inode)
 470                de->d_inode->i_size = 4096;
 471
 472        de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root,
 473                        (void *)devp, &ep_debugfs_fops);
 474        if (de && de->d_inode)
 475                de->d_inode->i_size = 4096;
 476
 477        return 0;
 478}
 479
 480void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 481                               struct c4iw_dev_ucontext *uctx)
 482{
 483        struct list_head *pos, *nxt;
 484        struct c4iw_qid_list *entry;
 485
 486        mutex_lock(&uctx->lock);
 487        list_for_each_safe(pos, nxt, &uctx->qpids) {
 488                entry = list_entry(pos, struct c4iw_qid_list, entry);
 489                list_del_init(&entry->entry);
 490                if (!(entry->qid & rdev->qpmask)) {
 491                        c4iw_put_resource(&rdev->resource.qid_table,
 492                                          entry->qid);
 493                        mutex_lock(&rdev->stats.lock);
 494                        rdev->stats.qid.cur -= rdev->qpmask + 1;
 495                        mutex_unlock(&rdev->stats.lock);
 496                }
 497                kfree(entry);
 498        }
 499
 500        list_for_each_safe(pos, nxt, &uctx->qpids) {
 501                entry = list_entry(pos, struct c4iw_qid_list, entry);
 502                list_del_init(&entry->entry);
 503                kfree(entry);
 504        }
 505        mutex_unlock(&uctx->lock);
 506}
 507
 508void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
 509                            struct c4iw_dev_ucontext *uctx)
 510{
 511        INIT_LIST_HEAD(&uctx->qpids);
 512        INIT_LIST_HEAD(&uctx->cqids);
 513        mutex_init(&uctx->lock);
 514}
 515
 516/* Caller takes care of locking if needed */
 517static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 518{
 519        int err;
 520
 521        c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 522
 523        /*
 524         * qpshift is the number of bits to shift the qpid left in order
 525         * to get the correct address of the doorbell for that qp.
 526         */
 527        rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density);
 528        rdev->qpmask = rdev->lldi.udb_density - 1;
 529        rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density);
 530        rdev->cqmask = rdev->lldi.ucq_density - 1;
 531        PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
 532             "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
 533             "qp qid start %u size %u cq qid start %u size %u\n",
 534             __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 535             rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 536             rdev->lldi.vr->pbl.start,
 537             rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
 538             rdev->lldi.vr->rq.size,
 539             rdev->lldi.vr->qp.start,
 540             rdev->lldi.vr->qp.size,
 541             rdev->lldi.vr->cq.start,
 542             rdev->lldi.vr->cq.size);
 543        PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
 544             "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
 545             (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
 546             (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2),
 547             rdev->lldi.db_reg,
 548             rdev->lldi.gts_reg,
 549             rdev->qpshift, rdev->qpmask,
 550             rdev->cqshift, rdev->cqmask);
 551
 552        if (c4iw_num_stags(rdev) == 0) {
 553                err = -EINVAL;
 554                goto err1;
 555        }
 556
 557        rdev->stats.pd.total = T4_MAX_NUM_PD;
 558        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 559        rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 560        rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
 561        rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 562        rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 563
 564        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
 565        if (err) {
 566                printk(KERN_ERR MOD "error %d initializing resources\n", err);
 567                goto err1;
 568        }
 569        err = c4iw_pblpool_create(rdev);
 570        if (err) {
 571                printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
 572                goto err2;
 573        }
 574        err = c4iw_rqtpool_create(rdev);
 575        if (err) {
 576                printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
 577                goto err3;
 578        }
 579        err = c4iw_ocqp_pool_create(rdev);
 580        if (err) {
 581                printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
 582                goto err4;
 583        }
 584        return 0;
 585err4:
 586        c4iw_rqtpool_destroy(rdev);
 587err3:
 588        c4iw_pblpool_destroy(rdev);
 589err2:
 590        c4iw_destroy_resource(&rdev->resource);
 591err1:
 592        return err;
 593}
 594
 595static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 596{
 597        c4iw_pblpool_destroy(rdev);
 598        c4iw_rqtpool_destroy(rdev);
 599        c4iw_destroy_resource(&rdev->resource);
 600}
 601
 602static void c4iw_dealloc(struct uld_ctx *ctx)
 603{
 604        c4iw_rdev_close(&ctx->dev->rdev);
 605        idr_destroy(&ctx->dev->cqidr);
 606        idr_destroy(&ctx->dev->qpidr);
 607        idr_destroy(&ctx->dev->mmidr);
 608        idr_destroy(&ctx->dev->hwtid_idr);
 609        idr_destroy(&ctx->dev->stid_idr);
 610        idr_destroy(&ctx->dev->atid_idr);
 611        iounmap(ctx->dev->rdev.oc_mw_kva);
 612        ib_dealloc_device(&ctx->dev->ibdev);
 613        ctx->dev = NULL;
 614}
 615
 616static void c4iw_remove(struct uld_ctx *ctx)
 617{
 618        PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
 619        c4iw_unregister_device(ctx->dev);
 620        c4iw_dealloc(ctx);
 621}
 622
 623static int rdma_supported(const struct cxgb4_lld_info *infop)
 624{
 625        return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
 626               infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
 627               infop->vr->cq.size > 0;
 628}
 629
 630static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 631{
 632        struct c4iw_dev *devp;
 633        int ret;
 634
 635        if (!rdma_supported(infop)) {
 636                printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
 637                       pci_name(infop->pdev));
 638                return ERR_PTR(-ENOSYS);
 639        }
 640        if (!ocqp_supported(infop))
 641                pr_info("%s: On-Chip Queues not supported on this device.\n",
 642                        pci_name(infop->pdev));
 643
 644        if (!is_t4(infop->adapter_type)) {
 645                if (!allow_db_fc_on_t5) {
 646                        db_fc_threshold = 100000;
 647                        pr_info("DB Flow Control Disabled.\n");
 648                }
 649
 650                if (!allow_db_coalescing_on_t5) {
 651                        db_coalescing_threshold = -1;
 652                        pr_info("DB Coalescing Disabled.\n");
 653                }
 654        }
 655
 656        devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
 657        if (!devp) {
 658                printk(KERN_ERR MOD "Cannot allocate ib device\n");
 659                return ERR_PTR(-ENOMEM);
 660        }
 661        devp->rdev.lldi = *infop;
 662
 663        devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) +
 664                (pci_resource_len(devp->rdev.lldi.pdev, 2) -
 665                 roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size));
 666        devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
 667                                               devp->rdev.lldi.vr->ocq.size);
 668
 669        PDBG(KERN_INFO MOD "ocq memory: "
 670               "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
 671               devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
 672               devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
 673
 674        ret = c4iw_rdev_open(&devp->rdev);
 675        if (ret) {
 676                printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
 677                ib_dealloc_device(&devp->ibdev);
 678                return ERR_PTR(ret);
 679        }
 680
 681        idr_init(&devp->cqidr);
 682        idr_init(&devp->qpidr);
 683        idr_init(&devp->mmidr);
 684        idr_init(&devp->hwtid_idr);
 685        idr_init(&devp->stid_idr);
 686        idr_init(&devp->atid_idr);
 687        spin_lock_init(&devp->lock);
 688        mutex_init(&devp->rdev.stats.lock);
 689        mutex_init(&devp->db_mutex);
 690
 691        if (c4iw_debugfs_root) {
 692                devp->debugfs_root = debugfs_create_dir(
 693                                        pci_name(devp->rdev.lldi.pdev),
 694                                        c4iw_debugfs_root);
 695                setup_debugfs(devp);
 696        }
 697        return devp;
 698}
 699
 700static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
 701{
 702        struct uld_ctx *ctx;
 703        static int vers_printed;
 704        int i;
 705
 706        if (!vers_printed++)
 707                pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
 708                        DRV_VERSION);
 709
 710        ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
 711        if (!ctx) {
 712                ctx = ERR_PTR(-ENOMEM);
 713                goto out;
 714        }
 715        ctx->lldi = *infop;
 716
 717        PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
 718             __func__, pci_name(ctx->lldi.pdev),
 719             ctx->lldi.nchan, ctx->lldi.nrxq,
 720             ctx->lldi.ntxq, ctx->lldi.nports);
 721
 722        mutex_lock(&dev_mutex);
 723        list_add_tail(&ctx->entry, &uld_ctx_list);
 724        mutex_unlock(&dev_mutex);
 725
 726        for (i = 0; i < ctx->lldi.nrxq; i++)
 727                PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
 728out:
 729        return ctx;
 730}
 731
 732static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
 733                                                 const __be64 *rsp,
 734                                                 u32 pktshift)
 735{
 736        struct sk_buff *skb;
 737
 738        /*
 739         * Allocate space for cpl_pass_accept_req which will be synthesized by
 740         * driver. Once the driver synthesizes the request the skb will go
 741         * through the regular cpl_pass_accept_req processing.
 742         * The math here assumes sizeof cpl_pass_accept_req >= sizeof
 743         * cpl_rx_pkt.
 744         */
 745        skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
 746                        sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
 747        if (unlikely(!skb))
 748                return NULL;
 749
 750         __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
 751                   sizeof(struct rss_header) - pktshift);
 752
 753        /*
 754         * This skb will contain:
 755         *   rss_header from the rspq descriptor (1 flit)
 756         *   cpl_rx_pkt struct from the rspq descriptor (2 flits)
 757         *   space for the difference between the size of an
 758         *      rx_pkt and pass_accept_req cpl (1 flit)
 759         *   the packet data from the gl
 760         */
 761        skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
 762                                sizeof(struct rss_header));
 763        skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
 764                                       sizeof(struct cpl_pass_accept_req),
 765                                       gl->va + pktshift,
 766                                       gl->tot_len - pktshift);
 767        return skb;
 768}
 769
 770static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
 771                           const __be64 *rsp)
 772{
 773        unsigned int opcode = *(u8 *)rsp;
 774        struct sk_buff *skb;
 775
 776        if (opcode != CPL_RX_PKT)
 777                goto out;
 778
 779        skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
 780        if (skb == NULL)
 781                goto out;
 782
 783        if (c4iw_handlers[opcode] == NULL) {
 784                pr_info("%s no handler opcode 0x%x...\n", __func__,
 785                       opcode);
 786                kfree_skb(skb);
 787                goto out;
 788        }
 789        c4iw_handlers[opcode](dev, skb);
 790        return 1;
 791out:
 792        return 0;
 793}
 794
 795static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
 796                        const struct pkt_gl *gl)
 797{
 798        struct uld_ctx *ctx = handle;
 799        struct c4iw_dev *dev = ctx->dev;
 800        struct sk_buff *skb;
 801        u8 opcode;
 802
 803        if (gl == NULL) {
 804                /* omit RSS and rsp_ctrl at end of descriptor */
 805                unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
 806
 807                skb = alloc_skb(256, GFP_ATOMIC);
 808                if (!skb)
 809                        goto nomem;
 810                __skb_put(skb, len);
 811                skb_copy_to_linear_data(skb, &rsp[1], len);
 812        } else if (gl == CXGB4_MSG_AN) {
 813                const struct rsp_ctrl *rc = (void *)rsp;
 814
 815                u32 qid = be32_to_cpu(rc->pldbuflen_qid);
 816                c4iw_ev_handler(dev, qid);
 817                return 0;
 818        } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
 819                if (recv_rx_pkt(dev, gl, rsp))
 820                        return 0;
 821
 822                pr_info("%s: unexpected FL contents at %p, " \
 823                       "RSS %#llx, FL %#llx, len %u\n",
 824                       pci_name(ctx->lldi.pdev), gl->va,
 825                       (unsigned long long)be64_to_cpu(*rsp),
 826                       (unsigned long long)be64_to_cpu(
 827                       *(__force __be64 *)gl->va),
 828                       gl->tot_len);
 829
 830                return 0;
 831        } else {
 832                skb = cxgb4_pktgl_to_skb(gl, 128, 128);
 833                if (unlikely(!skb))
 834                        goto nomem;
 835        }
 836
 837        opcode = *(u8 *)rsp;
 838        if (c4iw_handlers[opcode])
 839                c4iw_handlers[opcode](dev, skb);
 840        else
 841                pr_info("%s no handler opcode 0x%x...\n", __func__,
 842                       opcode);
 843
 844        return 0;
 845nomem:
 846        return -1;
 847}
 848
 849static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
 850{
 851        struct uld_ctx *ctx = handle;
 852
 853        PDBG("%s new_state %u\n", __func__, new_state);
 854        switch (new_state) {
 855        case CXGB4_STATE_UP:
 856                printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
 857                if (!ctx->dev) {
 858                        int ret;
 859
 860                        ctx->dev = c4iw_alloc(&ctx->lldi);
 861                        if (IS_ERR(ctx->dev)) {
 862                                printk(KERN_ERR MOD
 863                                       "%s: initialization failed: %ld\n",
 864                                       pci_name(ctx->lldi.pdev),
 865                                       PTR_ERR(ctx->dev));
 866                                ctx->dev = NULL;
 867                                break;
 868                        }
 869                        ret = c4iw_register_device(ctx->dev);
 870                        if (ret) {
 871                                printk(KERN_ERR MOD
 872                                       "%s: RDMA registration failed: %d\n",
 873                                       pci_name(ctx->lldi.pdev), ret);
 874                                c4iw_dealloc(ctx);
 875                        }
 876                }
 877                break;
 878        case CXGB4_STATE_DOWN:
 879                printk(KERN_INFO MOD "%s: Down\n",
 880                       pci_name(ctx->lldi.pdev));
 881                if (ctx->dev)
 882                        c4iw_remove(ctx);
 883                break;
 884        case CXGB4_STATE_START_RECOVERY:
 885                printk(KERN_INFO MOD "%s: Fatal Error\n",
 886                       pci_name(ctx->lldi.pdev));
 887                if (ctx->dev) {
 888                        struct ib_event event;
 889
 890                        ctx->dev->rdev.flags |= T4_FATAL_ERROR;
 891                        memset(&event, 0, sizeof event);
 892                        event.event  = IB_EVENT_DEVICE_FATAL;
 893                        event.device = &ctx->dev->ibdev;
 894                        ib_dispatch_event(&event);
 895                        c4iw_remove(ctx);
 896                }
 897                break;
 898        case CXGB4_STATE_DETACH:
 899                printk(KERN_INFO MOD "%s: Detach\n",
 900                       pci_name(ctx->lldi.pdev));
 901                if (ctx->dev)
 902                        c4iw_remove(ctx);
 903                break;
 904        }
 905        return 0;
 906}
 907
 908static int disable_qp_db(int id, void *p, void *data)
 909{
 910        struct c4iw_qp *qp = p;
 911
 912        t4_disable_wq_db(&qp->wq);
 913        return 0;
 914}
 915
 916static void stop_queues(struct uld_ctx *ctx)
 917{
 918        spin_lock_irq(&ctx->dev->lock);
 919        if (ctx->dev->db_state == NORMAL) {
 920                ctx->dev->rdev.stats.db_state_transitions++;
 921                ctx->dev->db_state = FLOW_CONTROL;
 922                idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
 923        }
 924        spin_unlock_irq(&ctx->dev->lock);
 925}
 926
 927static int enable_qp_db(int id, void *p, void *data)
 928{
 929        struct c4iw_qp *qp = p;
 930
 931        t4_enable_wq_db(&qp->wq);
 932        return 0;
 933}
 934
 935static void resume_queues(struct uld_ctx *ctx)
 936{
 937        spin_lock_irq(&ctx->dev->lock);
 938        if (ctx->dev->qpcnt <= db_fc_threshold &&
 939            ctx->dev->db_state == FLOW_CONTROL) {
 940                ctx->dev->db_state = NORMAL;
 941                ctx->dev->rdev.stats.db_state_transitions++;
 942                idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
 943        }
 944        spin_unlock_irq(&ctx->dev->lock);
 945}
 946
 947struct qp_list {
 948        unsigned idx;
 949        struct c4iw_qp **qps;
 950};
 951
 952static int add_and_ref_qp(int id, void *p, void *data)
 953{
 954        struct qp_list *qp_listp = data;
 955        struct c4iw_qp *qp = p;
 956
 957        c4iw_qp_add_ref(&qp->ibqp);
 958        qp_listp->qps[qp_listp->idx++] = qp;
 959        return 0;
 960}
 961
 962static int count_qps(int id, void *p, void *data)
 963{
 964        unsigned *countp = data;
 965        (*countp)++;
 966        return 0;
 967}
 968
 969static void deref_qps(struct qp_list qp_list)
 970{
 971        int idx;
 972
 973        for (idx = 0; idx < qp_list.idx; idx++)
 974                c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
 975}
 976
 977static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
 978{
 979        int idx;
 980        int ret;
 981
 982        for (idx = 0; idx < qp_list->idx; idx++) {
 983                struct c4iw_qp *qp = qp_list->qps[idx];
 984
 985                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
 986                                          qp->wq.sq.qid,
 987                                          t4_sq_host_wq_pidx(&qp->wq),
 988                                          t4_sq_wq_size(&qp->wq));
 989                if (ret) {
 990                        printk(KERN_ERR MOD "%s: Fatal error - "
 991                               "DB overflow recovery failed - "
 992                               "error syncing SQ qid %u\n",
 993                               pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
 994                        return;
 995                }
 996
 997                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
 998                                          qp->wq.rq.qid,
 999                                          t4_rq_host_wq_pidx(&qp->wq),
1000                                          t4_rq_wq_size(&qp->wq));
1001
1002                if (ret) {
1003                        printk(KERN_ERR MOD "%s: Fatal error - "
1004                               "DB overflow recovery failed - "
1005                               "error syncing RQ qid %u\n",
1006                               pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
1007                        return;
1008                }
1009
1010                /* Wait for the dbfifo to drain */
1011                while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
1012                        set_current_state(TASK_UNINTERRUPTIBLE);
1013                        schedule_timeout(usecs_to_jiffies(10));
1014                }
1015        }
1016}
1017
1018static void recover_queues(struct uld_ctx *ctx)
1019{
1020        int count = 0;
1021        struct qp_list qp_list;
1022        int ret;
1023
1024        /* lock out kernel db ringers */
1025        mutex_lock(&ctx->dev->db_mutex);
1026
1027        /* put all queues in to recovery mode */
1028        spin_lock_irq(&ctx->dev->lock);
1029        ctx->dev->db_state = RECOVERY;
1030        ctx->dev->rdev.stats.db_state_transitions++;
1031        idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
1032        spin_unlock_irq(&ctx->dev->lock);
1033
1034        /* slow everybody down */
1035        set_current_state(TASK_UNINTERRUPTIBLE);
1036        schedule_timeout(usecs_to_jiffies(1000));
1037
1038        /* Wait for the dbfifo to completely drain. */
1039        while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
1040                set_current_state(TASK_UNINTERRUPTIBLE);
1041                schedule_timeout(usecs_to_jiffies(10));
1042        }
1043
1044        /* flush the SGE contexts */
1045        ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
1046        if (ret) {
1047                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1048                       pci_name(ctx->lldi.pdev));
1049                goto out;
1050        }
1051
1052        /* Count active queues so we can build a list of queues to recover */
1053        spin_lock_irq(&ctx->dev->lock);
1054        idr_for_each(&ctx->dev->qpidr, count_qps, &count);
1055
1056        qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
1057        if (!qp_list.qps) {
1058                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1059                       pci_name(ctx->lldi.pdev));
1060                spin_unlock_irq(&ctx->dev->lock);
1061                goto out;
1062        }
1063        qp_list.idx = 0;
1064
1065        /* add and ref each qp so it doesn't get freed */
1066        idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
1067
1068        spin_unlock_irq(&ctx->dev->lock);
1069
1070        /* now traverse the list in a safe context to recover the db state*/
1071        recover_lost_dbs(ctx, &qp_list);
1072
1073        /* we're almost done!  deref the qps and clean up */
1074        deref_qps(qp_list);
1075        kfree(qp_list.qps);
1076
1077        /* Wait for the dbfifo to completely drain again */
1078        while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
1079                set_current_state(TASK_UNINTERRUPTIBLE);
1080                schedule_timeout(usecs_to_jiffies(10));
1081        }
1082
1083        /* resume the queues */
1084        spin_lock_irq(&ctx->dev->lock);
1085        if (ctx->dev->qpcnt > db_fc_threshold)
1086                ctx->dev->db_state = FLOW_CONTROL;
1087        else {
1088                ctx->dev->db_state = NORMAL;
1089                idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
1090        }
1091        ctx->dev->rdev.stats.db_state_transitions++;
1092        spin_unlock_irq(&ctx->dev->lock);
1093
1094out:
1095        /* start up kernel db ringers again */
1096        mutex_unlock(&ctx->dev->db_mutex);
1097}
1098
1099static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
1100{
1101        struct uld_ctx *ctx = handle;
1102
1103        switch (control) {
1104        case CXGB4_CONTROL_DB_FULL:
1105                stop_queues(ctx);
1106                mutex_lock(&ctx->dev->rdev.stats.lock);
1107                ctx->dev->rdev.stats.db_full++;
1108                mutex_unlock(&ctx->dev->rdev.stats.lock);
1109                break;
1110        case CXGB4_CONTROL_DB_EMPTY:
1111                resume_queues(ctx);
1112                mutex_lock(&ctx->dev->rdev.stats.lock);
1113                ctx->dev->rdev.stats.db_empty++;
1114                mutex_unlock(&ctx->dev->rdev.stats.lock);
1115                break;
1116        case CXGB4_CONTROL_DB_DROP:
1117                recover_queues(ctx);
1118                mutex_lock(&ctx->dev->rdev.stats.lock);
1119                ctx->dev->rdev.stats.db_drop++;
1120                mutex_unlock(&ctx->dev->rdev.stats.lock);
1121                break;
1122        default:
1123                printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
1124                       pci_name(ctx->lldi.pdev), control);
1125                break;
1126        }
1127        return 0;
1128}
1129
1130static struct cxgb4_uld_info c4iw_uld_info = {
1131        .name = DRV_NAME,
1132        .add = c4iw_uld_add,
1133        .rx_handler = c4iw_uld_rx_handler,
1134        .state_change = c4iw_uld_state_change,
1135        .control = c4iw_uld_control,
1136};
1137
1138static int __init c4iw_init_module(void)
1139{
1140        int err;
1141
1142        err = c4iw_cm_init();
1143        if (err)
1144                return err;
1145
1146        c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
1147        if (!c4iw_debugfs_root)
1148                printk(KERN_WARNING MOD
1149                       "could not create debugfs entry, continuing\n");
1150
1151        cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
1152
1153        return 0;
1154}
1155
1156static void __exit c4iw_exit_module(void)
1157{
1158        struct uld_ctx *ctx, *tmp;
1159
1160        mutex_lock(&dev_mutex);
1161        list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
1162                if (ctx->dev)
1163                        c4iw_remove(ctx);
1164                kfree(ctx);
1165        }
1166        mutex_unlock(&dev_mutex);
1167        cxgb4_unregister_uld(CXGB4_ULD_RDMA);
1168        c4iw_cm_term();
1169        debugfs_remove_recursive(c4iw_debugfs_root);
1170}
1171
1172module_init(c4iw_init_module);
1173module_exit(c4iw_exit_module);
1174