linux/drivers/infiniband/hw/cxgb4/device.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/moduleparam.h>
  34#include <linux/debugfs.h>
  35#include <linux/vmalloc.h>
  36#include <linux/math64.h>
  37
  38#include <rdma/ib_verbs.h>
  39
  40#include "iw_cxgb4.h"
  41
  42#define DRV_VERSION "0.1"
  43
  44MODULE_AUTHOR("Steve Wise");
  45MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
  46MODULE_LICENSE("Dual BSD/GPL");
  47MODULE_VERSION(DRV_VERSION);
  48
  49static int allow_db_fc_on_t5;
  50module_param(allow_db_fc_on_t5, int, 0644);
  51MODULE_PARM_DESC(allow_db_fc_on_t5,
  52                 "Allow DB Flow Control on T5 (default = 0)");
  53
  54static int allow_db_coalescing_on_t5;
  55module_param(allow_db_coalescing_on_t5, int, 0644);
  56MODULE_PARM_DESC(allow_db_coalescing_on_t5,
  57                 "Allow DB Coalescing on T5 (default = 0)");
  58
  59int c4iw_wr_log = 0;
  60module_param(c4iw_wr_log, int, 0444);
  61MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
  62
  63static int c4iw_wr_log_size_order = 12;
  64module_param(c4iw_wr_log_size_order, int, 0444);
  65MODULE_PARM_DESC(c4iw_wr_log_size_order,
  66                 "Number of entries (log2) in the work request timing log.");
  67
  68struct uld_ctx {
  69        struct list_head entry;
  70        struct cxgb4_lld_info lldi;
  71        struct c4iw_dev *dev;
  72};
  73
  74static LIST_HEAD(uld_ctx_list);
  75static DEFINE_MUTEX(dev_mutex);
  76
  77#define DB_FC_RESUME_SIZE 64
  78#define DB_FC_RESUME_DELAY 1
  79#define DB_FC_DRAIN_THRESH 0
  80
  81static struct dentry *c4iw_debugfs_root;
  82
  83struct c4iw_debugfs_data {
  84        struct c4iw_dev *devp;
  85        char *buf;
  86        int bufsize;
  87        int pos;
  88};
  89
  90static int count_idrs(int id, void *p, void *data)
  91{
  92        int *countp = data;
  93
  94        *countp = *countp + 1;
  95        return 0;
  96}
  97
  98static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
  99                            loff_t *ppos)
 100{
 101        struct c4iw_debugfs_data *d = file->private_data;
 102
 103        return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
 104}
 105
 106void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
 107{
 108        struct wr_log_entry le;
 109        int idx;
 110
 111        if (!wq->rdev->wr_log)
 112                return;
 113
 114        idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
 115                (wq->rdev->wr_log_size - 1);
 116        le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
 117        getnstimeofday(&le.poll_host_ts);
 118        le.valid = 1;
 119        le.cqe_sge_ts = CQE_TS(cqe);
 120        if (SQ_TYPE(cqe)) {
 121                le.qid = wq->sq.qid;
 122                le.opcode = CQE_OPCODE(cqe);
 123                le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts;
 124                le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
 125                le.wr_id = CQE_WRID_SQ_IDX(cqe);
 126        } else {
 127                le.qid = wq->rq.qid;
 128                le.opcode = FW_RI_RECEIVE;
 129                le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts;
 130                le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
 131                le.wr_id = CQE_WRID_MSN(cqe);
 132        }
 133        wq->rdev->wr_log[idx] = le;
 134}
 135
 136static int wr_log_show(struct seq_file *seq, void *v)
 137{
 138        struct c4iw_dev *dev = seq->private;
 139        struct timespec prev_ts = {0, 0};
 140        struct wr_log_entry *lep;
 141        int prev_ts_set = 0;
 142        int idx, end;
 143
 144#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
 145
 146        idx = atomic_read(&dev->rdev.wr_log_idx) &
 147                (dev->rdev.wr_log_size - 1);
 148        end = idx - 1;
 149        if (end < 0)
 150                end = dev->rdev.wr_log_size - 1;
 151        lep = &dev->rdev.wr_log[idx];
 152        while (idx != end) {
 153                if (lep->valid) {
 154                        if (!prev_ts_set) {
 155                                prev_ts_set = 1;
 156                                prev_ts = lep->poll_host_ts;
 157                        }
 158                        seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode "
 159                                   "%u %s 0x%x host_wr_delta sec %lu nsec %lu "
 160                                   "post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
 161                                   "poll_sge_ts 0x%llx post_poll_delta_ns %llu "
 162                                   "cqe_poll_delta_ns %llu\n",
 163                                   idx,
 164                                   timespec_sub(lep->poll_host_ts,
 165                                                prev_ts).tv_sec,
 166                                   timespec_sub(lep->poll_host_ts,
 167                                                prev_ts).tv_nsec,
 168                                   lep->qid, lep->opcode,
 169                                   lep->opcode == FW_RI_RECEIVE ?
 170                                                        "msn" : "wrid",
 171                                   lep->wr_id,
 172                                   timespec_sub(lep->poll_host_ts,
 173                                                lep->post_host_ts).tv_sec,
 174                                   timespec_sub(lep->poll_host_ts,
 175                                                lep->post_host_ts).tv_nsec,
 176                                   lep->post_sge_ts, lep->cqe_sge_ts,
 177                                   lep->poll_sge_ts,
 178                                   ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
 179                                   ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
 180                        prev_ts = lep->poll_host_ts;
 181                }
 182                idx++;
 183                if (idx > (dev->rdev.wr_log_size - 1))
 184                        idx = 0;
 185                lep = &dev->rdev.wr_log[idx];
 186        }
 187#undef ts2ns
 188        return 0;
 189}
 190
 191static int wr_log_open(struct inode *inode, struct file *file)
 192{
 193        return single_open(file, wr_log_show, inode->i_private);
 194}
 195
 196static ssize_t wr_log_clear(struct file *file, const char __user *buf,
 197                            size_t count, loff_t *pos)
 198{
 199        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 200        int i;
 201
 202        if (dev->rdev.wr_log)
 203                for (i = 0; i < dev->rdev.wr_log_size; i++)
 204                        dev->rdev.wr_log[i].valid = 0;
 205        return count;
 206}
 207
 208static const struct file_operations wr_log_debugfs_fops = {
 209        .owner   = THIS_MODULE,
 210        .open    = wr_log_open,
 211        .release = single_release,
 212        .read    = seq_read,
 213        .llseek  = seq_lseek,
 214        .write   = wr_log_clear,
 215};
 216
 217static int dump_qp(int id, void *p, void *data)
 218{
 219        struct c4iw_qp *qp = p;
 220        struct c4iw_debugfs_data *qpd = data;
 221        int space;
 222        int cc;
 223
 224        if (id != qp->wq.sq.qid)
 225                return 0;
 226
 227        space = qpd->bufsize - qpd->pos - 1;
 228        if (space == 0)
 229                return 1;
 230
 231        if (qp->ep) {
 232                if (qp->ep->com.local_addr.ss_family == AF_INET) {
 233                        struct sockaddr_in *lsin = (struct sockaddr_in *)
 234                                &qp->ep->com.cm_id->local_addr;
 235                        struct sockaddr_in *rsin = (struct sockaddr_in *)
 236                                &qp->ep->com.cm_id->remote_addr;
 237                        struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 238                                &qp->ep->com.cm_id->m_local_addr;
 239                        struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 240                                &qp->ep->com.cm_id->m_remote_addr;
 241
 242                        cc = snprintf(qpd->buf + qpd->pos, space,
 243                                      "rc qp sq id %u rq id %u state %u "
 244                                      "onchip %u ep tid %u state %u "
 245                                      "%pI4:%u/%u->%pI4:%u/%u\n",
 246                                      qp->wq.sq.qid, qp->wq.rq.qid,
 247                                      (int)qp->attr.state,
 248                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 249                                      qp->ep->hwtid, (int)qp->ep->com.state,
 250                                      &lsin->sin_addr, ntohs(lsin->sin_port),
 251                                      ntohs(mapped_lsin->sin_port),
 252                                      &rsin->sin_addr, ntohs(rsin->sin_port),
 253                                      ntohs(mapped_rsin->sin_port));
 254                } else {
 255                        struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 256                                &qp->ep->com.cm_id->local_addr;
 257                        struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 258                                &qp->ep->com.cm_id->remote_addr;
 259                        struct sockaddr_in6 *mapped_lsin6 =
 260                                (struct sockaddr_in6 *)
 261                                &qp->ep->com.cm_id->m_local_addr;
 262                        struct sockaddr_in6 *mapped_rsin6 =
 263                                (struct sockaddr_in6 *)
 264                                &qp->ep->com.cm_id->m_remote_addr;
 265
 266                        cc = snprintf(qpd->buf + qpd->pos, space,
 267                                      "rc qp sq id %u rq id %u state %u "
 268                                      "onchip %u ep tid %u state %u "
 269                                      "%pI6:%u/%u->%pI6:%u/%u\n",
 270                                      qp->wq.sq.qid, qp->wq.rq.qid,
 271                                      (int)qp->attr.state,
 272                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 273                                      qp->ep->hwtid, (int)qp->ep->com.state,
 274                                      &lsin6->sin6_addr,
 275                                      ntohs(lsin6->sin6_port),
 276                                      ntohs(mapped_lsin6->sin6_port),
 277                                      &rsin6->sin6_addr,
 278                                      ntohs(rsin6->sin6_port),
 279                                      ntohs(mapped_rsin6->sin6_port));
 280                }
 281        } else
 282                cc = snprintf(qpd->buf + qpd->pos, space,
 283                             "qp sq id %u rq id %u state %u onchip %u\n",
 284                              qp->wq.sq.qid, qp->wq.rq.qid,
 285                              (int)qp->attr.state,
 286                              qp->wq.sq.flags & T4_SQ_ONCHIP);
 287        if (cc < space)
 288                qpd->pos += cc;
 289        return 0;
 290}
 291
 292static int qp_release(struct inode *inode, struct file *file)
 293{
 294        struct c4iw_debugfs_data *qpd = file->private_data;
 295        if (!qpd) {
 296                printk(KERN_INFO "%s null qpd?\n", __func__);
 297                return 0;
 298        }
 299        vfree(qpd->buf);
 300        kfree(qpd);
 301        return 0;
 302}
 303
 304static int qp_open(struct inode *inode, struct file *file)
 305{
 306        struct c4iw_debugfs_data *qpd;
 307        int count = 1;
 308
 309        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
 310        if (!qpd)
 311                return -ENOMEM;
 312
 313        qpd->devp = inode->i_private;
 314        qpd->pos = 0;
 315
 316        spin_lock_irq(&qpd->devp->lock);
 317        idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
 318        spin_unlock_irq(&qpd->devp->lock);
 319
 320        qpd->bufsize = count * 180;
 321        qpd->buf = vmalloc(qpd->bufsize);
 322        if (!qpd->buf) {
 323                kfree(qpd);
 324                return -ENOMEM;
 325        }
 326
 327        spin_lock_irq(&qpd->devp->lock);
 328        idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
 329        spin_unlock_irq(&qpd->devp->lock);
 330
 331        qpd->buf[qpd->pos++] = 0;
 332        file->private_data = qpd;
 333        return 0;
 334}
 335
 336static const struct file_operations qp_debugfs_fops = {
 337        .owner   = THIS_MODULE,
 338        .open    = qp_open,
 339        .release = qp_release,
 340        .read    = debugfs_read,
 341        .llseek  = default_llseek,
 342};
 343
 344static int dump_stag(int id, void *p, void *data)
 345{
 346        struct c4iw_debugfs_data *stagd = data;
 347        int space;
 348        int cc;
 349        struct fw_ri_tpte tpte;
 350        int ret;
 351
 352        space = stagd->bufsize - stagd->pos - 1;
 353        if (space == 0)
 354                return 1;
 355
 356        ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8,
 357                              (__be32 *)&tpte);
 358        if (ret) {
 359                dev_err(&stagd->devp->rdev.lldi.pdev->dev,
 360                        "%s cxgb4_read_tpte err %d\n", __func__, ret);
 361                return ret;
 362        }
 363        cc = snprintf(stagd->buf + stagd->pos, space,
 364                      "stag: idx 0x%x valid %d key 0x%x state %d pdid %d "
 365                      "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
 366                      (u32)id<<8,
 367                      FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
 368                      FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
 369                      FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
 370                      FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
 371                      FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
 372                      FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
 373                      ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
 374                      ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
 375        if (cc < space)
 376                stagd->pos += cc;
 377        return 0;
 378}
 379
 380static int stag_release(struct inode *inode, struct file *file)
 381{
 382        struct c4iw_debugfs_data *stagd = file->private_data;
 383        if (!stagd) {
 384                printk(KERN_INFO "%s null stagd?\n", __func__);
 385                return 0;
 386        }
 387        vfree(stagd->buf);
 388        kfree(stagd);
 389        return 0;
 390}
 391
 392static int stag_open(struct inode *inode, struct file *file)
 393{
 394        struct c4iw_debugfs_data *stagd;
 395        int ret = 0;
 396        int count = 1;
 397
 398        stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
 399        if (!stagd) {
 400                ret = -ENOMEM;
 401                goto out;
 402        }
 403        stagd->devp = inode->i_private;
 404        stagd->pos = 0;
 405
 406        spin_lock_irq(&stagd->devp->lock);
 407        idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
 408        spin_unlock_irq(&stagd->devp->lock);
 409
 410        stagd->bufsize = count * 256;
 411        stagd->buf = vmalloc(stagd->bufsize);
 412        if (!stagd->buf) {
 413                ret = -ENOMEM;
 414                goto err1;
 415        }
 416
 417        spin_lock_irq(&stagd->devp->lock);
 418        idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
 419        spin_unlock_irq(&stagd->devp->lock);
 420
 421        stagd->buf[stagd->pos++] = 0;
 422        file->private_data = stagd;
 423        goto out;
 424err1:
 425        kfree(stagd);
 426out:
 427        return ret;
 428}
 429
 430static const struct file_operations stag_debugfs_fops = {
 431        .owner   = THIS_MODULE,
 432        .open    = stag_open,
 433        .release = stag_release,
 434        .read    = debugfs_read,
 435        .llseek  = default_llseek,
 436};
 437
 438static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
 439
 440static int stats_show(struct seq_file *seq, void *v)
 441{
 442        struct c4iw_dev *dev = seq->private;
 443
 444        seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
 445                   "Max", "Fail");
 446        seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
 447                        dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
 448                        dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
 449        seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 450                        dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 451                        dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
 452        seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 453                        dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 454                        dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
 455        seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
 456                        dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
 457                        dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
 458        seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
 459                        dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
 460                        dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
 461        seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
 462                        dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
 463                        dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
 464        seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
 465        seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
 466        seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
 467        seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
 468                   db_state_str[dev->db_state],
 469                   dev->rdev.stats.db_state_transitions,
 470                   dev->rdev.stats.db_fc_interruptions);
 471        seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
 472        seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
 473                   dev->rdev.stats.act_ofld_conn_fails);
 474        seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
 475                   dev->rdev.stats.pas_ofld_conn_fails);
 476        seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv);
 477        seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
 478        return 0;
 479}
 480
 481static int stats_open(struct inode *inode, struct file *file)
 482{
 483        return single_open(file, stats_show, inode->i_private);
 484}
 485
 486static ssize_t stats_clear(struct file *file, const char __user *buf,
 487                size_t count, loff_t *pos)
 488{
 489        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 490
 491        mutex_lock(&dev->rdev.stats.lock);
 492        dev->rdev.stats.pd.max = 0;
 493        dev->rdev.stats.pd.fail = 0;
 494        dev->rdev.stats.qid.max = 0;
 495        dev->rdev.stats.qid.fail = 0;
 496        dev->rdev.stats.stag.max = 0;
 497        dev->rdev.stats.stag.fail = 0;
 498        dev->rdev.stats.pbl.max = 0;
 499        dev->rdev.stats.pbl.fail = 0;
 500        dev->rdev.stats.rqt.max = 0;
 501        dev->rdev.stats.rqt.fail = 0;
 502        dev->rdev.stats.ocqp.max = 0;
 503        dev->rdev.stats.ocqp.fail = 0;
 504        dev->rdev.stats.db_full = 0;
 505        dev->rdev.stats.db_empty = 0;
 506        dev->rdev.stats.db_drop = 0;
 507        dev->rdev.stats.db_state_transitions = 0;
 508        dev->rdev.stats.tcam_full = 0;
 509        dev->rdev.stats.act_ofld_conn_fails = 0;
 510        dev->rdev.stats.pas_ofld_conn_fails = 0;
 511        mutex_unlock(&dev->rdev.stats.lock);
 512        return count;
 513}
 514
 515static const struct file_operations stats_debugfs_fops = {
 516        .owner   = THIS_MODULE,
 517        .open    = stats_open,
 518        .release = single_release,
 519        .read    = seq_read,
 520        .llseek  = seq_lseek,
 521        .write   = stats_clear,
 522};
 523
 524static int dump_ep(int id, void *p, void *data)
 525{
 526        struct c4iw_ep *ep = p;
 527        struct c4iw_debugfs_data *epd = data;
 528        int space;
 529        int cc;
 530
 531        space = epd->bufsize - epd->pos - 1;
 532        if (space == 0)
 533                return 1;
 534
 535        if (ep->com.local_addr.ss_family == AF_INET) {
 536                struct sockaddr_in *lsin = (struct sockaddr_in *)
 537                        &ep->com.cm_id->local_addr;
 538                struct sockaddr_in *rsin = (struct sockaddr_in *)
 539                        &ep->com.cm_id->remote_addr;
 540                struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 541                        &ep->com.cm_id->m_local_addr;
 542                struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 543                        &ep->com.cm_id->m_remote_addr;
 544
 545                cc = snprintf(epd->buf + epd->pos, space,
 546                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 547                              "history 0x%lx hwtid %d atid %d "
 548                              "conn_na %u abort_na %u "
 549                              "%pI4:%d/%d <-> %pI4:%d/%d\n",
 550                              ep, ep->com.cm_id, ep->com.qp,
 551                              (int)ep->com.state, ep->com.flags,
 552                              ep->com.history, ep->hwtid, ep->atid,
 553                              ep->stats.connect_neg_adv,
 554                              ep->stats.abort_neg_adv,
 555                              &lsin->sin_addr, ntohs(lsin->sin_port),
 556                              ntohs(mapped_lsin->sin_port),
 557                              &rsin->sin_addr, ntohs(rsin->sin_port),
 558                              ntohs(mapped_rsin->sin_port));
 559        } else {
 560                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 561                        &ep->com.cm_id->local_addr;
 562                struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 563                        &ep->com.cm_id->remote_addr;
 564                struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 565                        &ep->com.cm_id->m_local_addr;
 566                struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
 567                        &ep->com.cm_id->m_remote_addr;
 568
 569                cc = snprintf(epd->buf + epd->pos, space,
 570                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 571                              "history 0x%lx hwtid %d atid %d "
 572                              "conn_na %u abort_na %u "
 573                              "%pI6:%d/%d <-> %pI6:%d/%d\n",
 574                              ep, ep->com.cm_id, ep->com.qp,
 575                              (int)ep->com.state, ep->com.flags,
 576                              ep->com.history, ep->hwtid, ep->atid,
 577                              ep->stats.connect_neg_adv,
 578                              ep->stats.abort_neg_adv,
 579                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 580                              ntohs(mapped_lsin6->sin6_port),
 581                              &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
 582                              ntohs(mapped_rsin6->sin6_port));
 583        }
 584        if (cc < space)
 585                epd->pos += cc;
 586        return 0;
 587}
 588
 589static int dump_listen_ep(int id, void *p, void *data)
 590{
 591        struct c4iw_listen_ep *ep = p;
 592        struct c4iw_debugfs_data *epd = data;
 593        int space;
 594        int cc;
 595
 596        space = epd->bufsize - epd->pos - 1;
 597        if (space == 0)
 598                return 1;
 599
 600        if (ep->com.local_addr.ss_family == AF_INET) {
 601                struct sockaddr_in *lsin = (struct sockaddr_in *)
 602                        &ep->com.cm_id->local_addr;
 603                struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 604                        &ep->com.cm_id->m_local_addr;
 605
 606                cc = snprintf(epd->buf + epd->pos, space,
 607                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 608                              "backlog %d %pI4:%d/%d\n",
 609                              ep, ep->com.cm_id, (int)ep->com.state,
 610                              ep->com.flags, ep->stid, ep->backlog,
 611                              &lsin->sin_addr, ntohs(lsin->sin_port),
 612                              ntohs(mapped_lsin->sin_port));
 613        } else {
 614                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 615                        &ep->com.cm_id->local_addr;
 616                struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 617                        &ep->com.cm_id->m_local_addr;
 618
 619                cc = snprintf(epd->buf + epd->pos, space,
 620                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 621                              "backlog %d %pI6:%d/%d\n",
 622                              ep, ep->com.cm_id, (int)ep->com.state,
 623                              ep->com.flags, ep->stid, ep->backlog,
 624                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 625                              ntohs(mapped_lsin6->sin6_port));
 626        }
 627        if (cc < space)
 628                epd->pos += cc;
 629        return 0;
 630}
 631
 632static int ep_release(struct inode *inode, struct file *file)
 633{
 634        struct c4iw_debugfs_data *epd = file->private_data;
 635        if (!epd) {
 636                pr_info("%s null qpd?\n", __func__);
 637                return 0;
 638        }
 639        vfree(epd->buf);
 640        kfree(epd);
 641        return 0;
 642}
 643
 644static int ep_open(struct inode *inode, struct file *file)
 645{
 646        struct c4iw_debugfs_data *epd;
 647        int ret = 0;
 648        int count = 1;
 649
 650        epd = kmalloc(sizeof(*epd), GFP_KERNEL);
 651        if (!epd) {
 652                ret = -ENOMEM;
 653                goto out;
 654        }
 655        epd->devp = inode->i_private;
 656        epd->pos = 0;
 657
 658        spin_lock_irq(&epd->devp->lock);
 659        idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
 660        idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
 661        idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
 662        spin_unlock_irq(&epd->devp->lock);
 663
 664        epd->bufsize = count * 240;
 665        epd->buf = vmalloc(epd->bufsize);
 666        if (!epd->buf) {
 667                ret = -ENOMEM;
 668                goto err1;
 669        }
 670
 671        spin_lock_irq(&epd->devp->lock);
 672        idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
 673        idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
 674        idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
 675        spin_unlock_irq(&epd->devp->lock);
 676
 677        file->private_data = epd;
 678        goto out;
 679err1:
 680        kfree(epd);
 681out:
 682        return ret;
 683}
 684
 685static const struct file_operations ep_debugfs_fops = {
 686        .owner   = THIS_MODULE,
 687        .open    = ep_open,
 688        .release = ep_release,
 689        .read    = debugfs_read,
 690};
 691
 692static int setup_debugfs(struct c4iw_dev *devp)
 693{
 694        if (!devp->debugfs_root)
 695                return -1;
 696
 697        debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
 698                                 (void *)devp, &qp_debugfs_fops, 4096);
 699
 700        debugfs_create_file_size("stags", S_IWUSR, devp->debugfs_root,
 701                                 (void *)devp, &stag_debugfs_fops, 4096);
 702
 703        debugfs_create_file_size("stats", S_IWUSR, devp->debugfs_root,
 704                                 (void *)devp, &stats_debugfs_fops, 4096);
 705
 706        debugfs_create_file_size("eps", S_IWUSR, devp->debugfs_root,
 707                                 (void *)devp, &ep_debugfs_fops, 4096);
 708
 709        if (c4iw_wr_log)
 710                debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
 711                                         (void *)devp, &wr_log_debugfs_fops, 4096);
 712        return 0;
 713}
 714
 715void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 716                               struct c4iw_dev_ucontext *uctx)
 717{
 718        struct list_head *pos, *nxt;
 719        struct c4iw_qid_list *entry;
 720
 721        mutex_lock(&uctx->lock);
 722        list_for_each_safe(pos, nxt, &uctx->qpids) {
 723                entry = list_entry(pos, struct c4iw_qid_list, entry);
 724                list_del_init(&entry->entry);
 725                if (!(entry->qid & rdev->qpmask)) {
 726                        c4iw_put_resource(&rdev->resource.qid_table,
 727                                          entry->qid);
 728                        mutex_lock(&rdev->stats.lock);
 729                        rdev->stats.qid.cur -= rdev->qpmask + 1;
 730                        mutex_unlock(&rdev->stats.lock);
 731                }
 732                kfree(entry);
 733        }
 734
 735        list_for_each_safe(pos, nxt, &uctx->qpids) {
 736                entry = list_entry(pos, struct c4iw_qid_list, entry);
 737                list_del_init(&entry->entry);
 738                kfree(entry);
 739        }
 740        mutex_unlock(&uctx->lock);
 741}
 742
 743void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
 744                            struct c4iw_dev_ucontext *uctx)
 745{
 746        INIT_LIST_HEAD(&uctx->qpids);
 747        INIT_LIST_HEAD(&uctx->cqids);
 748        mutex_init(&uctx->lock);
 749}
 750
 751/* Caller takes care of locking if needed */
 752static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 753{
 754        int err;
 755
 756        c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 757
 758        /*
 759         * This implementation assumes udb_density == ucq_density!  Eventually
 760         * we might need to support this but for now fail the open. Also the
 761         * cqid and qpid range must match for now.
 762         */
 763        if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
 764                pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
 765                       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
 766                       rdev->lldi.ucq_density);
 767                return -EINVAL;
 768        }
 769        if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
 770            rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
 771                pr_err(MOD "%s: unsupported qp and cq id ranges "
 772                       "qp start %u size %u cq start %u size %u\n",
 773                       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
 774                       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
 775                       rdev->lldi.vr->cq.size);
 776                return -EINVAL;
 777        }
 778
 779        rdev->qpmask = rdev->lldi.udb_density - 1;
 780        rdev->cqmask = rdev->lldi.ucq_density - 1;
 781        PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
 782             "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
 783             "qp qid start %u size %u cq qid start %u size %u\n",
 784             __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 785             rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 786             rdev->lldi.vr->pbl.start,
 787             rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
 788             rdev->lldi.vr->rq.size,
 789             rdev->lldi.vr->qp.start,
 790             rdev->lldi.vr->qp.size,
 791             rdev->lldi.vr->cq.start,
 792             rdev->lldi.vr->cq.size);
 793        PDBG("udb %pR db_reg %p gts_reg %p "
 794             "qpmask 0x%x cqmask 0x%x\n",
 795                &rdev->lldi.pdev->resource[2],
 796             rdev->lldi.db_reg, rdev->lldi.gts_reg,
 797             rdev->qpmask, rdev->cqmask);
 798
 799        if (c4iw_num_stags(rdev) == 0)
 800                return -EINVAL;
 801
 802        rdev->stats.pd.total = T4_MAX_NUM_PD;
 803        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 804        rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 805        rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
 806        rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 807        rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 808
 809        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
 810        if (err) {
 811                printk(KERN_ERR MOD "error %d initializing resources\n", err);
 812                return err;
 813        }
 814        err = c4iw_pblpool_create(rdev);
 815        if (err) {
 816                printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
 817                goto destroy_resource;
 818        }
 819        err = c4iw_rqtpool_create(rdev);
 820        if (err) {
 821                printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
 822                goto destroy_pblpool;
 823        }
 824        err = c4iw_ocqp_pool_create(rdev);
 825        if (err) {
 826                printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
 827                goto destroy_rqtpool;
 828        }
 829        rdev->status_page = (struct t4_dev_status_page *)
 830                            __get_free_page(GFP_KERNEL);
 831        if (!rdev->status_page)
 832                goto destroy_ocqp_pool;
 833        rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
 834        rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
 835        rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
 836        rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
 837
 838        if (c4iw_wr_log) {
 839                rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
 840                                       sizeof(*rdev->wr_log), GFP_KERNEL);
 841                if (rdev->wr_log) {
 842                        rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
 843                        atomic_set(&rdev->wr_log_idx, 0);
 844                } else {
 845                        pr_err(MOD "error allocating wr_log. Logging disabled\n");
 846                }
 847        }
 848
 849        rdev->status_page->db_off = 0;
 850
 851        return 0;
 852destroy_ocqp_pool:
 853        c4iw_ocqp_pool_destroy(rdev);
 854destroy_rqtpool:
 855        c4iw_rqtpool_destroy(rdev);
 856destroy_pblpool:
 857        c4iw_pblpool_destroy(rdev);
 858destroy_resource:
 859        c4iw_destroy_resource(&rdev->resource);
 860        return err;
 861}
 862
 863static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 864{
 865        kfree(rdev->wr_log);
 866        free_page((unsigned long)rdev->status_page);
 867        c4iw_pblpool_destroy(rdev);
 868        c4iw_rqtpool_destroy(rdev);
 869        c4iw_destroy_resource(&rdev->resource);
 870}
 871
 872static void c4iw_dealloc(struct uld_ctx *ctx)
 873{
 874        c4iw_rdev_close(&ctx->dev->rdev);
 875        WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
 876        idr_destroy(&ctx->dev->cqidr);
 877        WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
 878        idr_destroy(&ctx->dev->qpidr);
 879        WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
 880        idr_destroy(&ctx->dev->mmidr);
 881        wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
 882        idr_destroy(&ctx->dev->hwtid_idr);
 883        idr_destroy(&ctx->dev->stid_idr);
 884        idr_destroy(&ctx->dev->atid_idr);
 885        if (ctx->dev->rdev.bar2_kva)
 886                iounmap(ctx->dev->rdev.bar2_kva);
 887        if (ctx->dev->rdev.oc_mw_kva)
 888                iounmap(ctx->dev->rdev.oc_mw_kva);
 889        ib_dealloc_device(&ctx->dev->ibdev);
 890        ctx->dev = NULL;
 891}
 892
 893static void c4iw_remove(struct uld_ctx *ctx)
 894{
 895        PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
 896        c4iw_unregister_device(ctx->dev);
 897        c4iw_dealloc(ctx);
 898}
 899
 900static int rdma_supported(const struct cxgb4_lld_info *infop)
 901{
 902        return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
 903               infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
 904               infop->vr->cq.size > 0;
 905}
 906
 907static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 908{
 909        struct c4iw_dev *devp;
 910        int ret;
 911
 912        if (!rdma_supported(infop)) {
 913                printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
 914                       pci_name(infop->pdev));
 915                return ERR_PTR(-ENOSYS);
 916        }
 917        if (!ocqp_supported(infop))
 918                pr_info("%s: On-Chip Queues not supported on this device.\n",
 919                        pci_name(infop->pdev));
 920
 921        devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
 922        if (!devp) {
 923                printk(KERN_ERR MOD "Cannot allocate ib device\n");
 924                return ERR_PTR(-ENOMEM);
 925        }
 926        devp->rdev.lldi = *infop;
 927
 928        /* init various hw-queue params based on lld info */
 929        PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
 930             __func__, devp->rdev.lldi.sge_ingpadboundary,
 931             devp->rdev.lldi.sge_egrstatuspagesize);
 932
 933        devp->rdev.hw_queue.t4_eq_status_entries =
 934                devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
 935        devp->rdev.hw_queue.t4_max_eq_size = 65520;
 936        devp->rdev.hw_queue.t4_max_iq_size = 65520;
 937        devp->rdev.hw_queue.t4_max_rq_size = 8192 -
 938                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 939        devp->rdev.hw_queue.t4_max_sq_size =
 940                devp->rdev.hw_queue.t4_max_eq_size -
 941                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 942        devp->rdev.hw_queue.t4_max_qp_depth =
 943                devp->rdev.hw_queue.t4_max_rq_size;
 944        devp->rdev.hw_queue.t4_max_cq_depth =
 945                devp->rdev.hw_queue.t4_max_iq_size - 2;
 946        devp->rdev.hw_queue.t4_stat_len =
 947                devp->rdev.lldi.sge_egrstatuspagesize;
 948
 949        /*
 950         * For T5/T6 devices, we map all of BAR2 with WC.
 951         * For T4 devices with onchip qp mem, we map only that part
 952         * of BAR2 with WC.
 953         */
 954        devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
 955        if (!is_t4(devp->rdev.lldi.adapter_type)) {
 956                devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
 957                        pci_resource_len(devp->rdev.lldi.pdev, 2));
 958                if (!devp->rdev.bar2_kva) {
 959                        pr_err(MOD "Unable to ioremap BAR2\n");
 960                        ib_dealloc_device(&devp->ibdev);
 961                        return ERR_PTR(-EINVAL);
 962                }
 963        } else if (ocqp_supported(infop)) {
 964                devp->rdev.oc_mw_pa =
 965                        pci_resource_start(devp->rdev.lldi.pdev, 2) +
 966                        pci_resource_len(devp->rdev.lldi.pdev, 2) -
 967                        roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
 968                devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
 969                        devp->rdev.lldi.vr->ocq.size);
 970                if (!devp->rdev.oc_mw_kva) {
 971                        pr_err(MOD "Unable to ioremap onchip mem\n");
 972                        ib_dealloc_device(&devp->ibdev);
 973                        return ERR_PTR(-EINVAL);
 974                }
 975        }
 976
 977        PDBG(KERN_INFO MOD "ocq memory: "
 978               "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
 979               devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
 980               devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
 981
 982        ret = c4iw_rdev_open(&devp->rdev);
 983        if (ret) {
 984                printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
 985                ib_dealloc_device(&devp->ibdev);
 986                return ERR_PTR(ret);
 987        }
 988
 989        idr_init(&devp->cqidr);
 990        idr_init(&devp->qpidr);
 991        idr_init(&devp->mmidr);
 992        idr_init(&devp->hwtid_idr);
 993        idr_init(&devp->stid_idr);
 994        idr_init(&devp->atid_idr);
 995        spin_lock_init(&devp->lock);
 996        mutex_init(&devp->rdev.stats.lock);
 997        mutex_init(&devp->db_mutex);
 998        INIT_LIST_HEAD(&devp->db_fc_list);
 999        init_waitqueue_head(&devp->wait);
1000        devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
1001
1002        if (c4iw_debugfs_root) {
1003                devp->debugfs_root = debugfs_create_dir(
1004                                        pci_name(devp->rdev.lldi.pdev),
1005                                        c4iw_debugfs_root);
1006                setup_debugfs(devp);
1007        }
1008
1009
1010        return devp;
1011}
1012
1013static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
1014{
1015        struct uld_ctx *ctx;
1016        static int vers_printed;
1017        int i;
1018
1019        if (!vers_printed++)
1020                pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
1021                        DRV_VERSION);
1022
1023        ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
1024        if (!ctx) {
1025                ctx = ERR_PTR(-ENOMEM);
1026                goto out;
1027        }
1028        ctx->lldi = *infop;
1029
1030        PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
1031             __func__, pci_name(ctx->lldi.pdev),
1032             ctx->lldi.nchan, ctx->lldi.nrxq,
1033             ctx->lldi.ntxq, ctx->lldi.nports);
1034
1035        mutex_lock(&dev_mutex);
1036        list_add_tail(&ctx->entry, &uld_ctx_list);
1037        mutex_unlock(&dev_mutex);
1038
1039        for (i = 0; i < ctx->lldi.nrxq; i++)
1040                PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
1041out:
1042        return ctx;
1043}
1044
1045static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
1046                                                 const __be64 *rsp,
1047                                                 u32 pktshift)
1048{
1049        struct sk_buff *skb;
1050
1051        /*
1052         * Allocate space for cpl_pass_accept_req which will be synthesized by
1053         * driver. Once the driver synthesizes the request the skb will go
1054         * through the regular cpl_pass_accept_req processing.
1055         * The math here assumes sizeof cpl_pass_accept_req >= sizeof
1056         * cpl_rx_pkt.
1057         */
1058        skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1059                        sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
1060        if (unlikely(!skb))
1061                return NULL;
1062
1063         __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1064                   sizeof(struct rss_header) - pktshift);
1065
1066        /*
1067         * This skb will contain:
1068         *   rss_header from the rspq descriptor (1 flit)
1069         *   cpl_rx_pkt struct from the rspq descriptor (2 flits)
1070         *   space for the difference between the size of an
1071         *      rx_pkt and pass_accept_req cpl (1 flit)
1072         *   the packet data from the gl
1073         */
1074        skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
1075                                sizeof(struct rss_header));
1076        skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
1077                                       sizeof(struct cpl_pass_accept_req),
1078                                       gl->va + pktshift,
1079                                       gl->tot_len - pktshift);
1080        return skb;
1081}
1082
1083static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
1084                           const __be64 *rsp)
1085{
1086        unsigned int opcode = *(u8 *)rsp;
1087        struct sk_buff *skb;
1088
1089        if (opcode != CPL_RX_PKT)
1090                goto out;
1091
1092        skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
1093        if (skb == NULL)
1094                goto out;
1095
1096        if (c4iw_handlers[opcode] == NULL) {
1097                pr_info("%s no handler opcode 0x%x...\n", __func__,
1098                       opcode);
1099                kfree_skb(skb);
1100                goto out;
1101        }
1102        c4iw_handlers[opcode](dev, skb);
1103        return 1;
1104out:
1105        return 0;
1106}
1107
1108static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
1109                        const struct pkt_gl *gl)
1110{
1111        struct uld_ctx *ctx = handle;
1112        struct c4iw_dev *dev = ctx->dev;
1113        struct sk_buff *skb;
1114        u8 opcode;
1115
1116        if (gl == NULL) {
1117                /* omit RSS and rsp_ctrl at end of descriptor */
1118                unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
1119
1120                skb = alloc_skb(256, GFP_ATOMIC);
1121                if (!skb)
1122                        goto nomem;
1123                __skb_put(skb, len);
1124                skb_copy_to_linear_data(skb, &rsp[1], len);
1125        } else if (gl == CXGB4_MSG_AN) {
1126                const struct rsp_ctrl *rc = (void *)rsp;
1127
1128                u32 qid = be32_to_cpu(rc->pldbuflen_qid);
1129                c4iw_ev_handler(dev, qid);
1130                return 0;
1131        } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
1132                if (recv_rx_pkt(dev, gl, rsp))
1133                        return 0;
1134
1135                pr_info("%s: unexpected FL contents at %p, " \
1136                       "RSS %#llx, FL %#llx, len %u\n",
1137                       pci_name(ctx->lldi.pdev), gl->va,
1138                       (unsigned long long)be64_to_cpu(*rsp),
1139                       (unsigned long long)be64_to_cpu(
1140                       *(__force __be64 *)gl->va),
1141                       gl->tot_len);
1142
1143                return 0;
1144        } else {
1145                skb = cxgb4_pktgl_to_skb(gl, 128, 128);
1146                if (unlikely(!skb))
1147                        goto nomem;
1148        }
1149
1150        opcode = *(u8 *)rsp;
1151        if (c4iw_handlers[opcode]) {
1152                c4iw_handlers[opcode](dev, skb);
1153        } else {
1154                pr_info("%s no handler opcode 0x%x...\n", __func__,
1155                       opcode);
1156                kfree_skb(skb);
1157        }
1158
1159        return 0;
1160nomem:
1161        return -1;
1162}
1163
1164static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
1165{
1166        struct uld_ctx *ctx = handle;
1167
1168        PDBG("%s new_state %u\n", __func__, new_state);
1169        switch (new_state) {
1170        case CXGB4_STATE_UP:
1171                printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
1172                if (!ctx->dev) {
1173                        int ret;
1174
1175                        ctx->dev = c4iw_alloc(&ctx->lldi);
1176                        if (IS_ERR(ctx->dev)) {
1177                                printk(KERN_ERR MOD
1178                                       "%s: initialization failed: %ld\n",
1179                                       pci_name(ctx->lldi.pdev),
1180                                       PTR_ERR(ctx->dev));
1181                                ctx->dev = NULL;
1182                                break;
1183                        }
1184                        ret = c4iw_register_device(ctx->dev);
1185                        if (ret) {
1186                                printk(KERN_ERR MOD
1187                                       "%s: RDMA registration failed: %d\n",
1188                                       pci_name(ctx->lldi.pdev), ret);
1189                                c4iw_dealloc(ctx);
1190                        }
1191                }
1192                break;
1193        case CXGB4_STATE_DOWN:
1194                printk(KERN_INFO MOD "%s: Down\n",
1195                       pci_name(ctx->lldi.pdev));
1196                if (ctx->dev)
1197                        c4iw_remove(ctx);
1198                break;
1199        case CXGB4_STATE_START_RECOVERY:
1200                printk(KERN_INFO MOD "%s: Fatal Error\n",
1201                       pci_name(ctx->lldi.pdev));
1202                if (ctx->dev) {
1203                        struct ib_event event;
1204
1205                        ctx->dev->rdev.flags |= T4_FATAL_ERROR;
1206                        memset(&event, 0, sizeof event);
1207                        event.event  = IB_EVENT_DEVICE_FATAL;
1208                        event.device = &ctx->dev->ibdev;
1209                        ib_dispatch_event(&event);
1210                        c4iw_remove(ctx);
1211                }
1212                break;
1213        case CXGB4_STATE_DETACH:
1214                printk(KERN_INFO MOD "%s: Detach\n",
1215                       pci_name(ctx->lldi.pdev));
1216                if (ctx->dev)
1217                        c4iw_remove(ctx);
1218                break;
1219        }
1220        return 0;
1221}
1222
1223static int disable_qp_db(int id, void *p, void *data)
1224{
1225        struct c4iw_qp *qp = p;
1226
1227        t4_disable_wq_db(&qp->wq);
1228        return 0;
1229}
1230
1231static void stop_queues(struct uld_ctx *ctx)
1232{
1233        unsigned long flags;
1234
1235        spin_lock_irqsave(&ctx->dev->lock, flags);
1236        ctx->dev->rdev.stats.db_state_transitions++;
1237        ctx->dev->db_state = STOPPED;
1238        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
1239                idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
1240        else
1241                ctx->dev->rdev.status_page->db_off = 1;
1242        spin_unlock_irqrestore(&ctx->dev->lock, flags);
1243}
1244
1245static int enable_qp_db(int id, void *p, void *data)
1246{
1247        struct c4iw_qp *qp = p;
1248
1249        t4_enable_wq_db(&qp->wq);
1250        return 0;
1251}
1252
1253static void resume_rc_qp(struct c4iw_qp *qp)
1254{
1255        spin_lock(&qp->lock);
1256        t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
1257        qp->wq.sq.wq_pidx_inc = 0;
1258        t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
1259        qp->wq.rq.wq_pidx_inc = 0;
1260        spin_unlock(&qp->lock);
1261}
1262
1263static void resume_a_chunk(struct uld_ctx *ctx)
1264{
1265        int i;
1266        struct c4iw_qp *qp;
1267
1268        for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
1269                qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
1270                                      db_fc_entry);
1271                list_del_init(&qp->db_fc_entry);
1272                resume_rc_qp(qp);
1273                if (list_empty(&ctx->dev->db_fc_list))
1274                        break;
1275        }
1276}
1277
1278static void resume_queues(struct uld_ctx *ctx)
1279{
1280        spin_lock_irq(&ctx->dev->lock);
1281        if (ctx->dev->db_state != STOPPED)
1282                goto out;
1283        ctx->dev->db_state = FLOW_CONTROL;
1284        while (1) {
1285                if (list_empty(&ctx->dev->db_fc_list)) {
1286                        WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
1287                        ctx->dev->db_state = NORMAL;
1288                        ctx->dev->rdev.stats.db_state_transitions++;
1289                        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
1290                                idr_for_each(&ctx->dev->qpidr, enable_qp_db,
1291                                             NULL);
1292                        } else {
1293                                ctx->dev->rdev.status_page->db_off = 0;
1294                        }
1295                        break;
1296                } else {
1297                        if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
1298                            < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
1299                               DB_FC_DRAIN_THRESH)) {
1300                                resume_a_chunk(ctx);
1301                        }
1302                        if (!list_empty(&ctx->dev->db_fc_list)) {
1303                                spin_unlock_irq(&ctx->dev->lock);
1304                                if (DB_FC_RESUME_DELAY) {
1305                                        set_current_state(TASK_UNINTERRUPTIBLE);
1306                                        schedule_timeout(DB_FC_RESUME_DELAY);
1307                                }
1308                                spin_lock_irq(&ctx->dev->lock);
1309                                if (ctx->dev->db_state != FLOW_CONTROL)
1310                                        break;
1311                        }
1312                }
1313        }
1314out:
1315        if (ctx->dev->db_state != NORMAL)
1316                ctx->dev->rdev.stats.db_fc_interruptions++;
1317        spin_unlock_irq(&ctx->dev->lock);
1318}
1319
1320struct qp_list {
1321        unsigned idx;
1322        struct c4iw_qp **qps;
1323};
1324
1325static int add_and_ref_qp(int id, void *p, void *data)
1326{
1327        struct qp_list *qp_listp = data;
1328        struct c4iw_qp *qp = p;
1329
1330        c4iw_qp_add_ref(&qp->ibqp);
1331        qp_listp->qps[qp_listp->idx++] = qp;
1332        return 0;
1333}
1334
1335static int count_qps(int id, void *p, void *data)
1336{
1337        unsigned *countp = data;
1338        (*countp)++;
1339        return 0;
1340}
1341
1342static void deref_qps(struct qp_list *qp_list)
1343{
1344        int idx;
1345
1346        for (idx = 0; idx < qp_list->idx; idx++)
1347                c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
1348}
1349
1350static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
1351{
1352        int idx;
1353        int ret;
1354
1355        for (idx = 0; idx < qp_list->idx; idx++) {
1356                struct c4iw_qp *qp = qp_list->qps[idx];
1357
1358                spin_lock_irq(&qp->rhp->lock);
1359                spin_lock(&qp->lock);
1360                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1361                                          qp->wq.sq.qid,
1362                                          t4_sq_host_wq_pidx(&qp->wq),
1363                                          t4_sq_wq_size(&qp->wq));
1364                if (ret) {
1365                        pr_err(MOD "%s: Fatal error - "
1366                               "DB overflow recovery failed - "
1367                               "error syncing SQ qid %u\n",
1368                               pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
1369                        spin_unlock(&qp->lock);
1370                        spin_unlock_irq(&qp->rhp->lock);
1371                        return;
1372                }
1373                qp->wq.sq.wq_pidx_inc = 0;
1374
1375                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1376                                          qp->wq.rq.qid,
1377                                          t4_rq_host_wq_pidx(&qp->wq),
1378                                          t4_rq_wq_size(&qp->wq));
1379
1380                if (ret) {
1381                        pr_err(MOD "%s: Fatal error - "
1382                               "DB overflow recovery failed - "
1383                               "error syncing RQ qid %u\n",
1384                               pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
1385                        spin_unlock(&qp->lock);
1386                        spin_unlock_irq(&qp->rhp->lock);
1387                        return;
1388                }
1389                qp->wq.rq.wq_pidx_inc = 0;
1390                spin_unlock(&qp->lock);
1391                spin_unlock_irq(&qp->rhp->lock);
1392
1393                /* Wait for the dbfifo to drain */
1394                while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
1395                        set_current_state(TASK_UNINTERRUPTIBLE);
1396                        schedule_timeout(usecs_to_jiffies(10));
1397                }
1398        }
1399}
1400
1401static void recover_queues(struct uld_ctx *ctx)
1402{
1403        int count = 0;
1404        struct qp_list qp_list;
1405        int ret;
1406
1407        /* slow everybody down */
1408        set_current_state(TASK_UNINTERRUPTIBLE);
1409        schedule_timeout(usecs_to_jiffies(1000));
1410
1411        /* flush the SGE contexts */
1412        ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
1413        if (ret) {
1414                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1415                       pci_name(ctx->lldi.pdev));
1416                return;
1417        }
1418
1419        /* Count active queues so we can build a list of queues to recover */
1420        spin_lock_irq(&ctx->dev->lock);
1421        WARN_ON(ctx->dev->db_state != STOPPED);
1422        ctx->dev->db_state = RECOVERY;
1423        idr_for_each(&ctx->dev->qpidr, count_qps, &count);
1424
1425        qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
1426        if (!qp_list.qps) {
1427                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1428                       pci_name(ctx->lldi.pdev));
1429                spin_unlock_irq(&ctx->dev->lock);
1430                return;
1431        }
1432        qp_list.idx = 0;
1433
1434        /* add and ref each qp so it doesn't get freed */
1435        idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
1436
1437        spin_unlock_irq(&ctx->dev->lock);
1438
1439        /* now traverse the list in a safe context to recover the db state*/
1440        recover_lost_dbs(ctx, &qp_list);
1441
1442        /* we're almost done!  deref the qps and clean up */
1443        deref_qps(&qp_list);
1444        kfree(qp_list.qps);
1445
1446        spin_lock_irq(&ctx->dev->lock);
1447        WARN_ON(ctx->dev->db_state != RECOVERY);
1448        ctx->dev->db_state = STOPPED;
1449        spin_unlock_irq(&ctx->dev->lock);
1450}
1451
1452static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
1453{
1454        struct uld_ctx *ctx = handle;
1455
1456        switch (control) {
1457        case CXGB4_CONTROL_DB_FULL:
1458                stop_queues(ctx);
1459                ctx->dev->rdev.stats.db_full++;
1460                break;
1461        case CXGB4_CONTROL_DB_EMPTY:
1462                resume_queues(ctx);
1463                mutex_lock(&ctx->dev->rdev.stats.lock);
1464                ctx->dev->rdev.stats.db_empty++;
1465                mutex_unlock(&ctx->dev->rdev.stats.lock);
1466                break;
1467        case CXGB4_CONTROL_DB_DROP:
1468                recover_queues(ctx);
1469                mutex_lock(&ctx->dev->rdev.stats.lock);
1470                ctx->dev->rdev.stats.db_drop++;
1471                mutex_unlock(&ctx->dev->rdev.stats.lock);
1472                break;
1473        default:
1474                printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
1475                       pci_name(ctx->lldi.pdev), control);
1476                break;
1477        }
1478        return 0;
1479}
1480
1481static struct cxgb4_uld_info c4iw_uld_info = {
1482        .name = DRV_NAME,
1483        .nrxq = MAX_ULD_QSETS,
1484        .rxq_size = 511,
1485        .ciq = true,
1486        .lro = false,
1487        .add = c4iw_uld_add,
1488        .rx_handler = c4iw_uld_rx_handler,
1489        .state_change = c4iw_uld_state_change,
1490        .control = c4iw_uld_control,
1491};
1492
1493static int __init c4iw_init_module(void)
1494{
1495        int err;
1496
1497        err = c4iw_cm_init();
1498        if (err)
1499                return err;
1500
1501        c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
1502        if (!c4iw_debugfs_root)
1503                printk(KERN_WARNING MOD
1504                       "could not create debugfs entry, continuing\n");
1505
1506        cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
1507
1508        return 0;
1509}
1510
1511static void __exit c4iw_exit_module(void)
1512{
1513        struct uld_ctx *ctx, *tmp;
1514
1515        mutex_lock(&dev_mutex);
1516        list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
1517                if (ctx->dev)
1518                        c4iw_remove(ctx);
1519                kfree(ctx);
1520        }
1521        mutex_unlock(&dev_mutex);
1522        cxgb4_unregister_uld(CXGB4_ULD_RDMA);
1523        c4iw_cm_term();
1524        debugfs_remove_recursive(c4iw_debugfs_root);
1525}
1526
1527module_init(c4iw_init_module);
1528module_exit(c4iw_exit_module);
1529