linux/drivers/infiniband/hw/cxgb4/device.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/moduleparam.h>
  34#include <linux/debugfs.h>
  35#include <linux/vmalloc.h>
  36#include <linux/math64.h>
  37
  38#include <rdma/ib_verbs.h>
  39
  40#include "iw_cxgb4.h"
  41
  42#define DRV_VERSION "0.1"
  43
  44MODULE_AUTHOR("Steve Wise");
  45MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
  46MODULE_LICENSE("Dual BSD/GPL");
  47
  48static int allow_db_fc_on_t5;
  49module_param(allow_db_fc_on_t5, int, 0644);
  50MODULE_PARM_DESC(allow_db_fc_on_t5,
  51                 "Allow DB Flow Control on T5 (default = 0)");
  52
  53static int allow_db_coalescing_on_t5;
  54module_param(allow_db_coalescing_on_t5, int, 0644);
  55MODULE_PARM_DESC(allow_db_coalescing_on_t5,
  56                 "Allow DB Coalescing on T5 (default = 0)");
  57
  58int c4iw_wr_log = 0;
  59module_param(c4iw_wr_log, int, 0444);
  60MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
  61
  62static int c4iw_wr_log_size_order = 12;
  63module_param(c4iw_wr_log_size_order, int, 0444);
  64MODULE_PARM_DESC(c4iw_wr_log_size_order,
  65                 "Number of entries (log2) in the work request timing log.");
  66
  67static LIST_HEAD(uld_ctx_list);
  68static DEFINE_MUTEX(dev_mutex);
  69static struct workqueue_struct *reg_workq;
  70
  71#define DB_FC_RESUME_SIZE 64
  72#define DB_FC_RESUME_DELAY 1
  73#define DB_FC_DRAIN_THRESH 0
  74
  75static struct dentry *c4iw_debugfs_root;
  76
  77struct c4iw_debugfs_data {
  78        struct c4iw_dev *devp;
  79        char *buf;
  80        int bufsize;
  81        int pos;
  82};
  83
  84static int count_idrs(int id, void *p, void *data)
  85{
  86        int *countp = data;
  87
  88        *countp = *countp + 1;
  89        return 0;
  90}
  91
  92static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
  93                            loff_t *ppos)
  94{
  95        struct c4iw_debugfs_data *d = file->private_data;
  96
  97        return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
  98}
  99
 100void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
 101{
 102        struct wr_log_entry le;
 103        int idx;
 104
 105        if (!wq->rdev->wr_log)
 106                return;
 107
 108        idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
 109                (wq->rdev->wr_log_size - 1);
 110        le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
 111        le.poll_host_time = ktime_get();
 112        le.valid = 1;
 113        le.cqe_sge_ts = CQE_TS(cqe);
 114        if (SQ_TYPE(cqe)) {
 115                le.qid = wq->sq.qid;
 116                le.opcode = CQE_OPCODE(cqe);
 117                le.post_host_time = wq->sq.sw_sq[wq->sq.cidx].host_time;
 118                le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
 119                le.wr_id = CQE_WRID_SQ_IDX(cqe);
 120        } else {
 121                le.qid = wq->rq.qid;
 122                le.opcode = FW_RI_RECEIVE;
 123                le.post_host_time = wq->rq.sw_rq[wq->rq.cidx].host_time;
 124                le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
 125                le.wr_id = CQE_WRID_MSN(cqe);
 126        }
 127        wq->rdev->wr_log[idx] = le;
 128}
 129
 130static int wr_log_show(struct seq_file *seq, void *v)
 131{
 132        struct c4iw_dev *dev = seq->private;
 133        ktime_t prev_time;
 134        struct wr_log_entry *lep;
 135        int prev_time_set = 0;
 136        int idx, end;
 137
 138#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
 139
 140        idx = atomic_read(&dev->rdev.wr_log_idx) &
 141                (dev->rdev.wr_log_size - 1);
 142        end = idx - 1;
 143        if (end < 0)
 144                end = dev->rdev.wr_log_size - 1;
 145        lep = &dev->rdev.wr_log[idx];
 146        while (idx != end) {
 147                if (lep->valid) {
 148                        if (!prev_time_set) {
 149                                prev_time_set = 1;
 150                                prev_time = lep->poll_host_time;
 151                        }
 152                        seq_printf(seq, "%04u: nsec %llu qid %u opcode "
 153                                   "%u %s 0x%x host_wr_delta nsec %llu "
 154                                   "post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
 155                                   "poll_sge_ts 0x%llx post_poll_delta_ns %llu "
 156                                   "cqe_poll_delta_ns %llu\n",
 157                                   idx,
 158                                   ktime_to_ns(ktime_sub(lep->poll_host_time,
 159                                                         prev_time)),
 160                                   lep->qid, lep->opcode,
 161                                   lep->opcode == FW_RI_RECEIVE ?
 162                                                        "msn" : "wrid",
 163                                   lep->wr_id,
 164                                   ktime_to_ns(ktime_sub(lep->poll_host_time,
 165                                                         lep->post_host_time)),
 166                                   lep->post_sge_ts, lep->cqe_sge_ts,
 167                                   lep->poll_sge_ts,
 168                                   ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
 169                                   ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
 170                        prev_time = lep->poll_host_time;
 171                }
 172                idx++;
 173                if (idx > (dev->rdev.wr_log_size - 1))
 174                        idx = 0;
 175                lep = &dev->rdev.wr_log[idx];
 176        }
 177#undef ts2ns
 178        return 0;
 179}
 180
 181static int wr_log_open(struct inode *inode, struct file *file)
 182{
 183        return single_open(file, wr_log_show, inode->i_private);
 184}
 185
 186static ssize_t wr_log_clear(struct file *file, const char __user *buf,
 187                            size_t count, loff_t *pos)
 188{
 189        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 190        int i;
 191
 192        if (dev->rdev.wr_log)
 193                for (i = 0; i < dev->rdev.wr_log_size; i++)
 194                        dev->rdev.wr_log[i].valid = 0;
 195        return count;
 196}
 197
 198static const struct file_operations wr_log_debugfs_fops = {
 199        .owner   = THIS_MODULE,
 200        .open    = wr_log_open,
 201        .release = single_release,
 202        .read    = seq_read,
 203        .llseek  = seq_lseek,
 204        .write   = wr_log_clear,
 205};
 206
 207static struct sockaddr_in zero_sin = {
 208        .sin_family = AF_INET,
 209};
 210
 211static struct sockaddr_in6 zero_sin6 = {
 212        .sin6_family = AF_INET6,
 213};
 214
 215static void set_ep_sin_addrs(struct c4iw_ep *ep,
 216                             struct sockaddr_in **lsin,
 217                             struct sockaddr_in **rsin,
 218                             struct sockaddr_in **m_lsin,
 219                             struct sockaddr_in **m_rsin)
 220{
 221        struct iw_cm_id *id = ep->com.cm_id;
 222
 223        *lsin = (struct sockaddr_in *)&ep->com.local_addr;
 224        *rsin = (struct sockaddr_in *)&ep->com.remote_addr;
 225        if (id) {
 226                *m_lsin = (struct sockaddr_in *)&id->m_local_addr;
 227                *m_rsin = (struct sockaddr_in *)&id->m_remote_addr;
 228        } else {
 229                *m_lsin = &zero_sin;
 230                *m_rsin = &zero_sin;
 231        }
 232}
 233
 234static void set_ep_sin6_addrs(struct c4iw_ep *ep,
 235                              struct sockaddr_in6 **lsin6,
 236                              struct sockaddr_in6 **rsin6,
 237                              struct sockaddr_in6 **m_lsin6,
 238                              struct sockaddr_in6 **m_rsin6)
 239{
 240        struct iw_cm_id *id = ep->com.cm_id;
 241
 242        *lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
 243        *rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
 244        if (id) {
 245                *m_lsin6 = (struct sockaddr_in6 *)&id->m_local_addr;
 246                *m_rsin6 = (struct sockaddr_in6 *)&id->m_remote_addr;
 247        } else {
 248                *m_lsin6 = &zero_sin6;
 249                *m_rsin6 = &zero_sin6;
 250        }
 251}
 252
 253static int dump_qp(int id, void *p, void *data)
 254{
 255        struct c4iw_qp *qp = p;
 256        struct c4iw_debugfs_data *qpd = data;
 257        int space;
 258        int cc;
 259
 260        if (id != qp->wq.sq.qid)
 261                return 0;
 262
 263        space = qpd->bufsize - qpd->pos - 1;
 264        if (space == 0)
 265                return 1;
 266
 267        if (qp->ep) {
 268                struct c4iw_ep *ep = qp->ep;
 269
 270                if (ep->com.local_addr.ss_family == AF_INET) {
 271                        struct sockaddr_in *lsin;
 272                        struct sockaddr_in *rsin;
 273                        struct sockaddr_in *m_lsin;
 274                        struct sockaddr_in *m_rsin;
 275
 276                        set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
 277                        cc = snprintf(qpd->buf + qpd->pos, space,
 278                                      "rc qp sq id %u rq id %u state %u "
 279                                      "onchip %u ep tid %u state %u "
 280                                      "%pI4:%u/%u->%pI4:%u/%u\n",
 281                                      qp->wq.sq.qid, qp->wq.rq.qid,
 282                                      (int)qp->attr.state,
 283                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 284                                      ep->hwtid, (int)ep->com.state,
 285                                      &lsin->sin_addr, ntohs(lsin->sin_port),
 286                                      ntohs(m_lsin->sin_port),
 287                                      &rsin->sin_addr, ntohs(rsin->sin_port),
 288                                      ntohs(m_rsin->sin_port));
 289                } else {
 290                        struct sockaddr_in6 *lsin6;
 291                        struct sockaddr_in6 *rsin6;
 292                        struct sockaddr_in6 *m_lsin6;
 293                        struct sockaddr_in6 *m_rsin6;
 294
 295                        set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6,
 296                                          &m_rsin6);
 297                        cc = snprintf(qpd->buf + qpd->pos, space,
 298                                      "rc qp sq id %u rq id %u state %u "
 299                                      "onchip %u ep tid %u state %u "
 300                                      "%pI6:%u/%u->%pI6:%u/%u\n",
 301                                      qp->wq.sq.qid, qp->wq.rq.qid,
 302                                      (int)qp->attr.state,
 303                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 304                                      ep->hwtid, (int)ep->com.state,
 305                                      &lsin6->sin6_addr,
 306                                      ntohs(lsin6->sin6_port),
 307                                      ntohs(m_lsin6->sin6_port),
 308                                      &rsin6->sin6_addr,
 309                                      ntohs(rsin6->sin6_port),
 310                                      ntohs(m_rsin6->sin6_port));
 311                }
 312        } else
 313                cc = snprintf(qpd->buf + qpd->pos, space,
 314                             "qp sq id %u rq id %u state %u onchip %u\n",
 315                              qp->wq.sq.qid, qp->wq.rq.qid,
 316                              (int)qp->attr.state,
 317                              qp->wq.sq.flags & T4_SQ_ONCHIP);
 318        if (cc < space)
 319                qpd->pos += cc;
 320        return 0;
 321}
 322
 323static int qp_release(struct inode *inode, struct file *file)
 324{
 325        struct c4iw_debugfs_data *qpd = file->private_data;
 326        if (!qpd) {
 327                pr_info("%s null qpd?\n", __func__);
 328                return 0;
 329        }
 330        vfree(qpd->buf);
 331        kfree(qpd);
 332        return 0;
 333}
 334
 335static int qp_open(struct inode *inode, struct file *file)
 336{
 337        struct c4iw_debugfs_data *qpd;
 338        int count = 1;
 339
 340        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
 341        if (!qpd)
 342                return -ENOMEM;
 343
 344        qpd->devp = inode->i_private;
 345        qpd->pos = 0;
 346
 347        spin_lock_irq(&qpd->devp->lock);
 348        idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
 349        spin_unlock_irq(&qpd->devp->lock);
 350
 351        qpd->bufsize = count * 180;
 352        qpd->buf = vmalloc(qpd->bufsize);
 353        if (!qpd->buf) {
 354                kfree(qpd);
 355                return -ENOMEM;
 356        }
 357
 358        spin_lock_irq(&qpd->devp->lock);
 359        idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
 360        spin_unlock_irq(&qpd->devp->lock);
 361
 362        qpd->buf[qpd->pos++] = 0;
 363        file->private_data = qpd;
 364        return 0;
 365}
 366
 367static const struct file_operations qp_debugfs_fops = {
 368        .owner   = THIS_MODULE,
 369        .open    = qp_open,
 370        .release = qp_release,
 371        .read    = debugfs_read,
 372        .llseek  = default_llseek,
 373};
 374
 375static int dump_stag(int id, void *p, void *data)
 376{
 377        struct c4iw_debugfs_data *stagd = data;
 378        int space;
 379        int cc;
 380        struct fw_ri_tpte tpte;
 381        int ret;
 382
 383        space = stagd->bufsize - stagd->pos - 1;
 384        if (space == 0)
 385                return 1;
 386
 387        ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8,
 388                              (__be32 *)&tpte);
 389        if (ret) {
 390                dev_err(&stagd->devp->rdev.lldi.pdev->dev,
 391                        "%s cxgb4_read_tpte err %d\n", __func__, ret);
 392                return ret;
 393        }
 394        cc = snprintf(stagd->buf + stagd->pos, space,
 395                      "stag: idx 0x%x valid %d key 0x%x state %d pdid %d "
 396                      "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
 397                      (u32)id<<8,
 398                      FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
 399                      FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
 400                      FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
 401                      FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
 402                      FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
 403                      FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
 404                      ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
 405                      ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
 406        if (cc < space)
 407                stagd->pos += cc;
 408        return 0;
 409}
 410
 411static int stag_release(struct inode *inode, struct file *file)
 412{
 413        struct c4iw_debugfs_data *stagd = file->private_data;
 414        if (!stagd) {
 415                pr_info("%s null stagd?\n", __func__);
 416                return 0;
 417        }
 418        vfree(stagd->buf);
 419        kfree(stagd);
 420        return 0;
 421}
 422
 423static int stag_open(struct inode *inode, struct file *file)
 424{
 425        struct c4iw_debugfs_data *stagd;
 426        int ret = 0;
 427        int count = 1;
 428
 429        stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
 430        if (!stagd) {
 431                ret = -ENOMEM;
 432                goto out;
 433        }
 434        stagd->devp = inode->i_private;
 435        stagd->pos = 0;
 436
 437        spin_lock_irq(&stagd->devp->lock);
 438        idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
 439        spin_unlock_irq(&stagd->devp->lock);
 440
 441        stagd->bufsize = count * 256;
 442        stagd->buf = vmalloc(stagd->bufsize);
 443        if (!stagd->buf) {
 444                ret = -ENOMEM;
 445                goto err1;
 446        }
 447
 448        spin_lock_irq(&stagd->devp->lock);
 449        idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
 450        spin_unlock_irq(&stagd->devp->lock);
 451
 452        stagd->buf[stagd->pos++] = 0;
 453        file->private_data = stagd;
 454        goto out;
 455err1:
 456        kfree(stagd);
 457out:
 458        return ret;
 459}
 460
 461static const struct file_operations stag_debugfs_fops = {
 462        .owner   = THIS_MODULE,
 463        .open    = stag_open,
 464        .release = stag_release,
 465        .read    = debugfs_read,
 466        .llseek  = default_llseek,
 467};
 468
 469static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
 470
 471static int stats_show(struct seq_file *seq, void *v)
 472{
 473        struct c4iw_dev *dev = seq->private;
 474
 475        seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
 476                   "Max", "Fail");
 477        seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
 478                        dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
 479                        dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
 480        seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 481                        dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 482                        dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
 483        seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 484                        dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 485                        dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
 486        seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
 487                        dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
 488                        dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
 489        seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
 490                        dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
 491                        dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
 492        seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
 493                        dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
 494                        dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
 495        seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
 496        seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
 497        seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
 498        seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
 499                   db_state_str[dev->db_state],
 500                   dev->rdev.stats.db_state_transitions,
 501                   dev->rdev.stats.db_fc_interruptions);
 502        seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
 503        seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
 504                   dev->rdev.stats.act_ofld_conn_fails);
 505        seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
 506                   dev->rdev.stats.pas_ofld_conn_fails);
 507        seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv);
 508        seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
 509        return 0;
 510}
 511
 512static int stats_open(struct inode *inode, struct file *file)
 513{
 514        return single_open(file, stats_show, inode->i_private);
 515}
 516
 517static ssize_t stats_clear(struct file *file, const char __user *buf,
 518                size_t count, loff_t *pos)
 519{
 520        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 521
 522        mutex_lock(&dev->rdev.stats.lock);
 523        dev->rdev.stats.pd.max = 0;
 524        dev->rdev.stats.pd.fail = 0;
 525        dev->rdev.stats.qid.max = 0;
 526        dev->rdev.stats.qid.fail = 0;
 527        dev->rdev.stats.stag.max = 0;
 528        dev->rdev.stats.stag.fail = 0;
 529        dev->rdev.stats.pbl.max = 0;
 530        dev->rdev.stats.pbl.fail = 0;
 531        dev->rdev.stats.rqt.max = 0;
 532        dev->rdev.stats.rqt.fail = 0;
 533        dev->rdev.stats.ocqp.max = 0;
 534        dev->rdev.stats.ocqp.fail = 0;
 535        dev->rdev.stats.db_full = 0;
 536        dev->rdev.stats.db_empty = 0;
 537        dev->rdev.stats.db_drop = 0;
 538        dev->rdev.stats.db_state_transitions = 0;
 539        dev->rdev.stats.tcam_full = 0;
 540        dev->rdev.stats.act_ofld_conn_fails = 0;
 541        dev->rdev.stats.pas_ofld_conn_fails = 0;
 542        mutex_unlock(&dev->rdev.stats.lock);
 543        return count;
 544}
 545
 546static const struct file_operations stats_debugfs_fops = {
 547        .owner   = THIS_MODULE,
 548        .open    = stats_open,
 549        .release = single_release,
 550        .read    = seq_read,
 551        .llseek  = seq_lseek,
 552        .write   = stats_clear,
 553};
 554
 555static int dump_ep(int id, void *p, void *data)
 556{
 557        struct c4iw_ep *ep = p;
 558        struct c4iw_debugfs_data *epd = data;
 559        int space;
 560        int cc;
 561
 562        space = epd->bufsize - epd->pos - 1;
 563        if (space == 0)
 564                return 1;
 565
 566        if (ep->com.local_addr.ss_family == AF_INET) {
 567                struct sockaddr_in *lsin;
 568                struct sockaddr_in *rsin;
 569                struct sockaddr_in *m_lsin;
 570                struct sockaddr_in *m_rsin;
 571
 572                set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
 573                cc = snprintf(epd->buf + epd->pos, space,
 574                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 575                              "history 0x%lx hwtid %d atid %d "
 576                              "conn_na %u abort_na %u "
 577                              "%pI4:%d/%d <-> %pI4:%d/%d\n",
 578                              ep, ep->com.cm_id, ep->com.qp,
 579                              (int)ep->com.state, ep->com.flags,
 580                              ep->com.history, ep->hwtid, ep->atid,
 581                              ep->stats.connect_neg_adv,
 582                              ep->stats.abort_neg_adv,
 583                              &lsin->sin_addr, ntohs(lsin->sin_port),
 584                              ntohs(m_lsin->sin_port),
 585                              &rsin->sin_addr, ntohs(rsin->sin_port),
 586                              ntohs(m_rsin->sin_port));
 587        } else {
 588                struct sockaddr_in6 *lsin6;
 589                struct sockaddr_in6 *rsin6;
 590                struct sockaddr_in6 *m_lsin6;
 591                struct sockaddr_in6 *m_rsin6;
 592
 593                set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6, &m_rsin6);
 594                cc = snprintf(epd->buf + epd->pos, space,
 595                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 596                              "history 0x%lx hwtid %d atid %d "
 597                              "conn_na %u abort_na %u "
 598                              "%pI6:%d/%d <-> %pI6:%d/%d\n",
 599                              ep, ep->com.cm_id, ep->com.qp,
 600                              (int)ep->com.state, ep->com.flags,
 601                              ep->com.history, ep->hwtid, ep->atid,
 602                              ep->stats.connect_neg_adv,
 603                              ep->stats.abort_neg_adv,
 604                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 605                              ntohs(m_lsin6->sin6_port),
 606                              &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
 607                              ntohs(m_rsin6->sin6_port));
 608        }
 609        if (cc < space)
 610                epd->pos += cc;
 611        return 0;
 612}
 613
 614static int dump_listen_ep(int id, void *p, void *data)
 615{
 616        struct c4iw_listen_ep *ep = p;
 617        struct c4iw_debugfs_data *epd = data;
 618        int space;
 619        int cc;
 620
 621        space = epd->bufsize - epd->pos - 1;
 622        if (space == 0)
 623                return 1;
 624
 625        if (ep->com.local_addr.ss_family == AF_INET) {
 626                struct sockaddr_in *lsin = (struct sockaddr_in *)
 627                        &ep->com.cm_id->local_addr;
 628                struct sockaddr_in *m_lsin = (struct sockaddr_in *)
 629                        &ep->com.cm_id->m_local_addr;
 630
 631                cc = snprintf(epd->buf + epd->pos, space,
 632                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 633                              "backlog %d %pI4:%d/%d\n",
 634                              ep, ep->com.cm_id, (int)ep->com.state,
 635                              ep->com.flags, ep->stid, ep->backlog,
 636                              &lsin->sin_addr, ntohs(lsin->sin_port),
 637                              ntohs(m_lsin->sin_port));
 638        } else {
 639                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 640                        &ep->com.cm_id->local_addr;
 641                struct sockaddr_in6 *m_lsin6 = (struct sockaddr_in6 *)
 642                        &ep->com.cm_id->m_local_addr;
 643
 644                cc = snprintf(epd->buf + epd->pos, space,
 645                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 646                              "backlog %d %pI6:%d/%d\n",
 647                              ep, ep->com.cm_id, (int)ep->com.state,
 648                              ep->com.flags, ep->stid, ep->backlog,
 649                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 650                              ntohs(m_lsin6->sin6_port));
 651        }
 652        if (cc < space)
 653                epd->pos += cc;
 654        return 0;
 655}
 656
 657static int ep_release(struct inode *inode, struct file *file)
 658{
 659        struct c4iw_debugfs_data *epd = file->private_data;
 660        if (!epd) {
 661                pr_info("%s null qpd?\n", __func__);
 662                return 0;
 663        }
 664        vfree(epd->buf);
 665        kfree(epd);
 666        return 0;
 667}
 668
 669static int ep_open(struct inode *inode, struct file *file)
 670{
 671        struct c4iw_debugfs_data *epd;
 672        int ret = 0;
 673        int count = 1;
 674
 675        epd = kmalloc(sizeof(*epd), GFP_KERNEL);
 676        if (!epd) {
 677                ret = -ENOMEM;
 678                goto out;
 679        }
 680        epd->devp = inode->i_private;
 681        epd->pos = 0;
 682
 683        spin_lock_irq(&epd->devp->lock);
 684        idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
 685        idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
 686        idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
 687        spin_unlock_irq(&epd->devp->lock);
 688
 689        epd->bufsize = count * 240;
 690        epd->buf = vmalloc(epd->bufsize);
 691        if (!epd->buf) {
 692                ret = -ENOMEM;
 693                goto err1;
 694        }
 695
 696        spin_lock_irq(&epd->devp->lock);
 697        idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
 698        idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
 699        idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
 700        spin_unlock_irq(&epd->devp->lock);
 701
 702        file->private_data = epd;
 703        goto out;
 704err1:
 705        kfree(epd);
 706out:
 707        return ret;
 708}
 709
 710static const struct file_operations ep_debugfs_fops = {
 711        .owner   = THIS_MODULE,
 712        .open    = ep_open,
 713        .release = ep_release,
 714        .read    = debugfs_read,
 715};
 716
 717static int setup_debugfs(struct c4iw_dev *devp)
 718{
 719        if (!devp->debugfs_root)
 720                return -1;
 721
 722        debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
 723                                 (void *)devp, &qp_debugfs_fops, 4096);
 724
 725        debugfs_create_file_size("stags", S_IWUSR, devp->debugfs_root,
 726                                 (void *)devp, &stag_debugfs_fops, 4096);
 727
 728        debugfs_create_file_size("stats", S_IWUSR, devp->debugfs_root,
 729                                 (void *)devp, &stats_debugfs_fops, 4096);
 730
 731        debugfs_create_file_size("eps", S_IWUSR, devp->debugfs_root,
 732                                 (void *)devp, &ep_debugfs_fops, 4096);
 733
 734        if (c4iw_wr_log)
 735                debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
 736                                         (void *)devp, &wr_log_debugfs_fops, 4096);
 737        return 0;
 738}
 739
 740void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 741                               struct c4iw_dev_ucontext *uctx)
 742{
 743        struct list_head *pos, *nxt;
 744        struct c4iw_qid_list *entry;
 745
 746        mutex_lock(&uctx->lock);
 747        list_for_each_safe(pos, nxt, &uctx->qpids) {
 748                entry = list_entry(pos, struct c4iw_qid_list, entry);
 749                list_del_init(&entry->entry);
 750                if (!(entry->qid & rdev->qpmask)) {
 751                        c4iw_put_resource(&rdev->resource.qid_table,
 752                                          entry->qid);
 753                        mutex_lock(&rdev->stats.lock);
 754                        rdev->stats.qid.cur -= rdev->qpmask + 1;
 755                        mutex_unlock(&rdev->stats.lock);
 756                }
 757                kfree(entry);
 758        }
 759
 760        list_for_each_safe(pos, nxt, &uctx->cqids) {
 761                entry = list_entry(pos, struct c4iw_qid_list, entry);
 762                list_del_init(&entry->entry);
 763                kfree(entry);
 764        }
 765        mutex_unlock(&uctx->lock);
 766}
 767
 768void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
 769                            struct c4iw_dev_ucontext *uctx)
 770{
 771        INIT_LIST_HEAD(&uctx->qpids);
 772        INIT_LIST_HEAD(&uctx->cqids);
 773        mutex_init(&uctx->lock);
 774}
 775
 776/* Caller takes care of locking if needed */
 777static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 778{
 779        int err;
 780
 781        c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 782
 783        /*
 784         * This implementation assumes udb_density == ucq_density!  Eventually
 785         * we might need to support this but for now fail the open. Also the
 786         * cqid and qpid range must match for now.
 787         */
 788        if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
 789                pr_err("%s: unsupported udb/ucq densities %u/%u\n",
 790                       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
 791                       rdev->lldi.ucq_density);
 792                return -EINVAL;
 793        }
 794        if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
 795            rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
 796                pr_err("%s: unsupported qp and cq id ranges qp start %u size %u cq start %u size %u\n",
 797                       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
 798                       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
 799                       rdev->lldi.vr->cq.size);
 800                return -EINVAL;
 801        }
 802
 803        rdev->qpmask = rdev->lldi.udb_density - 1;
 804        rdev->cqmask = rdev->lldi.ucq_density - 1;
 805        pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
 806                 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 807                 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 808                 rdev->lldi.vr->pbl.start,
 809                 rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
 810                 rdev->lldi.vr->rq.size,
 811                 rdev->lldi.vr->qp.start,
 812                 rdev->lldi.vr->qp.size,
 813                 rdev->lldi.vr->cq.start,
 814                 rdev->lldi.vr->cq.size);
 815        pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
 816                 &rdev->lldi.pdev->resource[2],
 817                 rdev->lldi.db_reg, rdev->lldi.gts_reg,
 818                 rdev->qpmask, rdev->cqmask);
 819
 820        if (c4iw_num_stags(rdev) == 0)
 821                return -EINVAL;
 822
 823        rdev->stats.pd.total = T4_MAX_NUM_PD;
 824        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 825        rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 826        rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
 827        rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 828        rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 829
 830        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
 831        if (err) {
 832                pr_err("error %d initializing resources\n", err);
 833                return err;
 834        }
 835        err = c4iw_pblpool_create(rdev);
 836        if (err) {
 837                pr_err("error %d initializing pbl pool\n", err);
 838                goto destroy_resource;
 839        }
 840        err = c4iw_rqtpool_create(rdev);
 841        if (err) {
 842                pr_err("error %d initializing rqt pool\n", err);
 843                goto destroy_pblpool;
 844        }
 845        err = c4iw_ocqp_pool_create(rdev);
 846        if (err) {
 847                pr_err("error %d initializing ocqp pool\n", err);
 848                goto destroy_rqtpool;
 849        }
 850        rdev->status_page = (struct t4_dev_status_page *)
 851                            __get_free_page(GFP_KERNEL);
 852        if (!rdev->status_page) {
 853                err = -ENOMEM;
 854                goto destroy_ocqp_pool;
 855        }
 856        rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
 857        rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
 858        rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
 859        rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
 860
 861        if (c4iw_wr_log) {
 862                rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
 863                                       sizeof(*rdev->wr_log), GFP_KERNEL);
 864                if (rdev->wr_log) {
 865                        rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
 866                        atomic_set(&rdev->wr_log_idx, 0);
 867                }
 868        }
 869
 870        rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
 871        if (!rdev->free_workq) {
 872                err = -ENOMEM;
 873                goto err_free_status_page_and_wr_log;
 874        }
 875
 876        rdev->status_page->db_off = 0;
 877
 878        return 0;
 879err_free_status_page_and_wr_log:
 880        if (c4iw_wr_log && rdev->wr_log)
 881                kfree(rdev->wr_log);
 882        free_page((unsigned long)rdev->status_page);
 883destroy_ocqp_pool:
 884        c4iw_ocqp_pool_destroy(rdev);
 885destroy_rqtpool:
 886        c4iw_rqtpool_destroy(rdev);
 887destroy_pblpool:
 888        c4iw_pblpool_destroy(rdev);
 889destroy_resource:
 890        c4iw_destroy_resource(&rdev->resource);
 891        return err;
 892}
 893
 894static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 895{
 896        destroy_workqueue(rdev->free_workq);
 897        kfree(rdev->wr_log);
 898        c4iw_release_dev_ucontext(rdev, &rdev->uctx);
 899        free_page((unsigned long)rdev->status_page);
 900        c4iw_pblpool_destroy(rdev);
 901        c4iw_rqtpool_destroy(rdev);
 902        c4iw_ocqp_pool_destroy(rdev);
 903        c4iw_destroy_resource(&rdev->resource);
 904}
 905
 906void c4iw_dealloc(struct uld_ctx *ctx)
 907{
 908        c4iw_rdev_close(&ctx->dev->rdev);
 909        WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
 910        idr_destroy(&ctx->dev->cqidr);
 911        WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
 912        idr_destroy(&ctx->dev->qpidr);
 913        WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
 914        idr_destroy(&ctx->dev->mmidr);
 915        wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
 916        idr_destroy(&ctx->dev->hwtid_idr);
 917        idr_destroy(&ctx->dev->stid_idr);
 918        idr_destroy(&ctx->dev->atid_idr);
 919        if (ctx->dev->rdev.bar2_kva)
 920                iounmap(ctx->dev->rdev.bar2_kva);
 921        if (ctx->dev->rdev.oc_mw_kva)
 922                iounmap(ctx->dev->rdev.oc_mw_kva);
 923        ib_dealloc_device(&ctx->dev->ibdev);
 924        ctx->dev = NULL;
 925}
 926
 927static void c4iw_remove(struct uld_ctx *ctx)
 928{
 929        pr_debug("c4iw_dev %p\n", ctx->dev);
 930        c4iw_unregister_device(ctx->dev);
 931        c4iw_dealloc(ctx);
 932}
 933
 934static int rdma_supported(const struct cxgb4_lld_info *infop)
 935{
 936        return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
 937               infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
 938               infop->vr->cq.size > 0;
 939}
 940
 941static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 942{
 943        struct c4iw_dev *devp;
 944        int ret;
 945
 946        if (!rdma_supported(infop)) {
 947                pr_info("%s: RDMA not supported on this device\n",
 948                        pci_name(infop->pdev));
 949                return ERR_PTR(-ENOSYS);
 950        }
 951        if (!ocqp_supported(infop))
 952                pr_info("%s: On-Chip Queues not supported on this device\n",
 953                        pci_name(infop->pdev));
 954
 955        devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
 956        if (!devp) {
 957                pr_err("Cannot allocate ib device\n");
 958                return ERR_PTR(-ENOMEM);
 959        }
 960        devp->rdev.lldi = *infop;
 961
 962        /* init various hw-queue params based on lld info */
 963        pr_debug("Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
 964                 devp->rdev.lldi.sge_ingpadboundary,
 965                 devp->rdev.lldi.sge_egrstatuspagesize);
 966
 967        devp->rdev.hw_queue.t4_eq_status_entries =
 968                devp->rdev.lldi.sge_egrstatuspagesize / 64;
 969        devp->rdev.hw_queue.t4_max_eq_size = 65520;
 970        devp->rdev.hw_queue.t4_max_iq_size = 65520;
 971        devp->rdev.hw_queue.t4_max_rq_size = 8192 -
 972                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 973        devp->rdev.hw_queue.t4_max_sq_size =
 974                devp->rdev.hw_queue.t4_max_eq_size -
 975                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 976        devp->rdev.hw_queue.t4_max_qp_depth =
 977                devp->rdev.hw_queue.t4_max_rq_size;
 978        devp->rdev.hw_queue.t4_max_cq_depth =
 979                devp->rdev.hw_queue.t4_max_iq_size - 2;
 980        devp->rdev.hw_queue.t4_stat_len =
 981                devp->rdev.lldi.sge_egrstatuspagesize;
 982
 983        /*
 984         * For T5/T6 devices, we map all of BAR2 with WC.
 985         * For T4 devices with onchip qp mem, we map only that part
 986         * of BAR2 with WC.
 987         */
 988        devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
 989        if (!is_t4(devp->rdev.lldi.adapter_type)) {
 990                devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
 991                        pci_resource_len(devp->rdev.lldi.pdev, 2));
 992                if (!devp->rdev.bar2_kva) {
 993                        pr_err("Unable to ioremap BAR2\n");
 994                        ib_dealloc_device(&devp->ibdev);
 995                        return ERR_PTR(-EINVAL);
 996                }
 997        } else if (ocqp_supported(infop)) {
 998                devp->rdev.oc_mw_pa =
 999                        pci_resource_start(devp->rdev.lldi.pdev, 2) +
1000                        pci_resource_len(devp->rdev.lldi.pdev, 2) -
1001                        roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
1002                devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
1003                        devp->rdev.lldi.vr->ocq.size);
1004                if (!devp->rdev.oc_mw_kva) {
1005                        pr_err("Unable to ioremap onchip mem\n");
1006                        ib_dealloc_device(&devp->ibdev);
1007                        return ERR_PTR(-EINVAL);
1008                }
1009        }
1010
1011        pr_debug("ocq memory: hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
1012                 devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
1013                 devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
1014
1015        ret = c4iw_rdev_open(&devp->rdev);
1016        if (ret) {
1017                pr_err("Unable to open CXIO rdev err %d\n", ret);
1018                ib_dealloc_device(&devp->ibdev);
1019                return ERR_PTR(ret);
1020        }
1021
1022        idr_init(&devp->cqidr);
1023        idr_init(&devp->qpidr);
1024        idr_init(&devp->mmidr);
1025        idr_init(&devp->hwtid_idr);
1026        idr_init(&devp->stid_idr);
1027        idr_init(&devp->atid_idr);
1028        spin_lock_init(&devp->lock);
1029        mutex_init(&devp->rdev.stats.lock);
1030        mutex_init(&devp->db_mutex);
1031        INIT_LIST_HEAD(&devp->db_fc_list);
1032        init_waitqueue_head(&devp->wait);
1033        devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
1034
1035        if (c4iw_debugfs_root) {
1036                devp->debugfs_root = debugfs_create_dir(
1037                                        pci_name(devp->rdev.lldi.pdev),
1038                                        c4iw_debugfs_root);
1039                setup_debugfs(devp);
1040        }
1041
1042
1043        return devp;
1044}
1045
1046static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
1047{
1048        struct uld_ctx *ctx;
1049        static int vers_printed;
1050        int i;
1051
1052        if (!vers_printed++)
1053                pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
1054                        DRV_VERSION);
1055
1056        ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
1057        if (!ctx) {
1058                ctx = ERR_PTR(-ENOMEM);
1059                goto out;
1060        }
1061        ctx->lldi = *infop;
1062
1063        pr_debug("found device %s nchan %u nrxq %u ntxq %u nports %u\n",
1064                 pci_name(ctx->lldi.pdev),
1065                 ctx->lldi.nchan, ctx->lldi.nrxq,
1066                 ctx->lldi.ntxq, ctx->lldi.nports);
1067
1068        mutex_lock(&dev_mutex);
1069        list_add_tail(&ctx->entry, &uld_ctx_list);
1070        mutex_unlock(&dev_mutex);
1071
1072        for (i = 0; i < ctx->lldi.nrxq; i++)
1073                pr_debug("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
1074out:
1075        return ctx;
1076}
1077
1078static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
1079                                                 const __be64 *rsp,
1080                                                 u32 pktshift)
1081{
1082        struct sk_buff *skb;
1083
1084        /*
1085         * Allocate space for cpl_pass_accept_req which will be synthesized by
1086         * driver. Once the driver synthesizes the request the skb will go
1087         * through the regular cpl_pass_accept_req processing.
1088         * The math here assumes sizeof cpl_pass_accept_req >= sizeof
1089         * cpl_rx_pkt.
1090         */
1091        skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1092                        sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
1093        if (unlikely(!skb))
1094                return NULL;
1095
1096        __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1097                  sizeof(struct rss_header) - pktshift);
1098
1099        /*
1100         * This skb will contain:
1101         *   rss_header from the rspq descriptor (1 flit)
1102         *   cpl_rx_pkt struct from the rspq descriptor (2 flits)
1103         *   space for the difference between the size of an
1104         *      rx_pkt and pass_accept_req cpl (1 flit)
1105         *   the packet data from the gl
1106         */
1107        skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
1108                                sizeof(struct rss_header));
1109        skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
1110                                       sizeof(struct cpl_pass_accept_req),
1111                                       gl->va + pktshift,
1112                                       gl->tot_len - pktshift);
1113        return skb;
1114}
1115
1116static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
1117                           const __be64 *rsp)
1118{
1119        unsigned int opcode = *(u8 *)rsp;
1120        struct sk_buff *skb;
1121
1122        if (opcode != CPL_RX_PKT)
1123                goto out;
1124
1125        skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
1126        if (skb == NULL)
1127                goto out;
1128
1129        if (c4iw_handlers[opcode] == NULL) {
1130                pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
1131                kfree_skb(skb);
1132                goto out;
1133        }
1134        c4iw_handlers[opcode](dev, skb);
1135        return 1;
1136out:
1137        return 0;
1138}
1139
1140static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
1141                        const struct pkt_gl *gl)
1142{
1143        struct uld_ctx *ctx = handle;
1144        struct c4iw_dev *dev = ctx->dev;
1145        struct sk_buff *skb;
1146        u8 opcode;
1147
1148        if (gl == NULL) {
1149                /* omit RSS and rsp_ctrl at end of descriptor */
1150                unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
1151
1152                skb = alloc_skb(256, GFP_ATOMIC);
1153                if (!skb)
1154                        goto nomem;
1155                __skb_put(skb, len);
1156                skb_copy_to_linear_data(skb, &rsp[1], len);
1157        } else if (gl == CXGB4_MSG_AN) {
1158                const struct rsp_ctrl *rc = (void *)rsp;
1159
1160                u32 qid = be32_to_cpu(rc->pldbuflen_qid);
1161                c4iw_ev_handler(dev, qid);
1162                return 0;
1163        } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
1164                if (recv_rx_pkt(dev, gl, rsp))
1165                        return 0;
1166
1167                pr_info("%s: unexpected FL contents at %p, RSS %#llx, FL %#llx, len %u\n",
1168                        pci_name(ctx->lldi.pdev), gl->va,
1169                        be64_to_cpu(*rsp),
1170                        be64_to_cpu(*(__force __be64 *)gl->va),
1171                        gl->tot_len);
1172
1173                return 0;
1174        } else {
1175                skb = cxgb4_pktgl_to_skb(gl, 128, 128);
1176                if (unlikely(!skb))
1177                        goto nomem;
1178        }
1179
1180        opcode = *(u8 *)rsp;
1181        if (c4iw_handlers[opcode]) {
1182                c4iw_handlers[opcode](dev, skb);
1183        } else {
1184                pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
1185                kfree_skb(skb);
1186        }
1187
1188        return 0;
1189nomem:
1190        return -1;
1191}
1192
1193static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
1194{
1195        struct uld_ctx *ctx = handle;
1196
1197        pr_debug("new_state %u\n", new_state);
1198        switch (new_state) {
1199        case CXGB4_STATE_UP:
1200                pr_info("%s: Up\n", pci_name(ctx->lldi.pdev));
1201                if (!ctx->dev) {
1202                        ctx->dev = c4iw_alloc(&ctx->lldi);
1203                        if (IS_ERR(ctx->dev)) {
1204                                pr_err("%s: initialization failed: %ld\n",
1205                                       pci_name(ctx->lldi.pdev),
1206                                       PTR_ERR(ctx->dev));
1207                                ctx->dev = NULL;
1208                                break;
1209                        }
1210
1211                        INIT_WORK(&ctx->reg_work, c4iw_register_device);
1212                        queue_work(reg_workq, &ctx->reg_work);
1213                }
1214                break;
1215        case CXGB4_STATE_DOWN:
1216                pr_info("%s: Down\n", pci_name(ctx->lldi.pdev));
1217                if (ctx->dev)
1218                        c4iw_remove(ctx);
1219                break;
1220        case CXGB4_STATE_START_RECOVERY:
1221                pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
1222                if (ctx->dev) {
1223                        struct ib_event event;
1224
1225                        ctx->dev->rdev.flags |= T4_FATAL_ERROR;
1226                        memset(&event, 0, sizeof event);
1227                        event.event  = IB_EVENT_DEVICE_FATAL;
1228                        event.device = &ctx->dev->ibdev;
1229                        ib_dispatch_event(&event);
1230                        c4iw_remove(ctx);
1231                }
1232                break;
1233        case CXGB4_STATE_DETACH:
1234                pr_info("%s: Detach\n", pci_name(ctx->lldi.pdev));
1235                if (ctx->dev)
1236                        c4iw_remove(ctx);
1237                break;
1238        }
1239        return 0;
1240}
1241
1242static int disable_qp_db(int id, void *p, void *data)
1243{
1244        struct c4iw_qp *qp = p;
1245
1246        t4_disable_wq_db(&qp->wq);
1247        return 0;
1248}
1249
1250static void stop_queues(struct uld_ctx *ctx)
1251{
1252        unsigned long flags;
1253
1254        spin_lock_irqsave(&ctx->dev->lock, flags);
1255        ctx->dev->rdev.stats.db_state_transitions++;
1256        ctx->dev->db_state = STOPPED;
1257        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
1258                idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
1259        else
1260                ctx->dev->rdev.status_page->db_off = 1;
1261        spin_unlock_irqrestore(&ctx->dev->lock, flags);
1262}
1263
1264static int enable_qp_db(int id, void *p, void *data)
1265{
1266        struct c4iw_qp *qp = p;
1267
1268        t4_enable_wq_db(&qp->wq);
1269        return 0;
1270}
1271
1272static void resume_rc_qp(struct c4iw_qp *qp)
1273{
1274        spin_lock(&qp->lock);
1275        t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
1276        qp->wq.sq.wq_pidx_inc = 0;
1277        t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
1278        qp->wq.rq.wq_pidx_inc = 0;
1279        spin_unlock(&qp->lock);
1280}
1281
1282static void resume_a_chunk(struct uld_ctx *ctx)
1283{
1284        int i;
1285        struct c4iw_qp *qp;
1286
1287        for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
1288                qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
1289                                      db_fc_entry);
1290                list_del_init(&qp->db_fc_entry);
1291                resume_rc_qp(qp);
1292                if (list_empty(&ctx->dev->db_fc_list))
1293                        break;
1294        }
1295}
1296
1297static void resume_queues(struct uld_ctx *ctx)
1298{
1299        spin_lock_irq(&ctx->dev->lock);
1300        if (ctx->dev->db_state != STOPPED)
1301                goto out;
1302        ctx->dev->db_state = FLOW_CONTROL;
1303        while (1) {
1304                if (list_empty(&ctx->dev->db_fc_list)) {
1305                        WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
1306                        ctx->dev->db_state = NORMAL;
1307                        ctx->dev->rdev.stats.db_state_transitions++;
1308                        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
1309                                idr_for_each(&ctx->dev->qpidr, enable_qp_db,
1310                                             NULL);
1311                        } else {
1312                                ctx->dev->rdev.status_page->db_off = 0;
1313                        }
1314                        break;
1315                } else {
1316                        if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
1317                            < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
1318                               DB_FC_DRAIN_THRESH)) {
1319                                resume_a_chunk(ctx);
1320                        }
1321                        if (!list_empty(&ctx->dev->db_fc_list)) {
1322                                spin_unlock_irq(&ctx->dev->lock);
1323                                if (DB_FC_RESUME_DELAY) {
1324                                        set_current_state(TASK_UNINTERRUPTIBLE);
1325                                        schedule_timeout(DB_FC_RESUME_DELAY);
1326                                }
1327                                spin_lock_irq(&ctx->dev->lock);
1328                                if (ctx->dev->db_state != FLOW_CONTROL)
1329                                        break;
1330                        }
1331                }
1332        }
1333out:
1334        if (ctx->dev->db_state != NORMAL)
1335                ctx->dev->rdev.stats.db_fc_interruptions++;
1336        spin_unlock_irq(&ctx->dev->lock);
1337}
1338
1339struct qp_list {
1340        unsigned idx;
1341        struct c4iw_qp **qps;
1342};
1343
1344static int add_and_ref_qp(int id, void *p, void *data)
1345{
1346        struct qp_list *qp_listp = data;
1347        struct c4iw_qp *qp = p;
1348
1349        c4iw_qp_add_ref(&qp->ibqp);
1350        qp_listp->qps[qp_listp->idx++] = qp;
1351        return 0;
1352}
1353
1354static int count_qps(int id, void *p, void *data)
1355{
1356        unsigned *countp = data;
1357        (*countp)++;
1358        return 0;
1359}
1360
1361static void deref_qps(struct qp_list *qp_list)
1362{
1363        int idx;
1364
1365        for (idx = 0; idx < qp_list->idx; idx++)
1366                c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
1367}
1368
1369static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
1370{
1371        int idx;
1372        int ret;
1373
1374        for (idx = 0; idx < qp_list->idx; idx++) {
1375                struct c4iw_qp *qp = qp_list->qps[idx];
1376
1377                spin_lock_irq(&qp->rhp->lock);
1378                spin_lock(&qp->lock);
1379                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1380                                          qp->wq.sq.qid,
1381                                          t4_sq_host_wq_pidx(&qp->wq),
1382                                          t4_sq_wq_size(&qp->wq));
1383                if (ret) {
1384                        pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n",
1385                               pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
1386                        spin_unlock(&qp->lock);
1387                        spin_unlock_irq(&qp->rhp->lock);
1388                        return;
1389                }
1390                qp->wq.sq.wq_pidx_inc = 0;
1391
1392                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1393                                          qp->wq.rq.qid,
1394                                          t4_rq_host_wq_pidx(&qp->wq),
1395                                          t4_rq_wq_size(&qp->wq));
1396
1397                if (ret) {
1398                        pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n",
1399                               pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
1400                        spin_unlock(&qp->lock);
1401                        spin_unlock_irq(&qp->rhp->lock);
1402                        return;
1403                }
1404                qp->wq.rq.wq_pidx_inc = 0;
1405                spin_unlock(&qp->lock);
1406                spin_unlock_irq(&qp->rhp->lock);
1407
1408                /* Wait for the dbfifo to drain */
1409                while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
1410                        set_current_state(TASK_UNINTERRUPTIBLE);
1411                        schedule_timeout(usecs_to_jiffies(10));
1412                }
1413        }
1414}
1415
1416static void recover_queues(struct uld_ctx *ctx)
1417{
1418        int count = 0;
1419        struct qp_list qp_list;
1420        int ret;
1421
1422        /* slow everybody down */
1423        set_current_state(TASK_UNINTERRUPTIBLE);
1424        schedule_timeout(usecs_to_jiffies(1000));
1425
1426        /* flush the SGE contexts */
1427        ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
1428        if (ret) {
1429                pr_err("%s: Fatal error - DB overflow recovery failed\n",
1430                       pci_name(ctx->lldi.pdev));
1431                return;
1432        }
1433
1434        /* Count active queues so we can build a list of queues to recover */
1435        spin_lock_irq(&ctx->dev->lock);
1436        WARN_ON(ctx->dev->db_state != STOPPED);
1437        ctx->dev->db_state = RECOVERY;
1438        idr_for_each(&ctx->dev->qpidr, count_qps, &count);
1439
1440        qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
1441        if (!qp_list.qps) {
1442                spin_unlock_irq(&ctx->dev->lock);
1443                return;
1444        }
1445        qp_list.idx = 0;
1446
1447        /* add and ref each qp so it doesn't get freed */
1448        idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
1449
1450        spin_unlock_irq(&ctx->dev->lock);
1451
1452        /* now traverse the list in a safe context to recover the db state*/
1453        recover_lost_dbs(ctx, &qp_list);
1454
1455        /* we're almost done!  deref the qps and clean up */
1456        deref_qps(&qp_list);
1457        kfree(qp_list.qps);
1458
1459        spin_lock_irq(&ctx->dev->lock);
1460        WARN_ON(ctx->dev->db_state != RECOVERY);
1461        ctx->dev->db_state = STOPPED;
1462        spin_unlock_irq(&ctx->dev->lock);
1463}
1464
1465static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
1466{
1467        struct uld_ctx *ctx = handle;
1468
1469        switch (control) {
1470        case CXGB4_CONTROL_DB_FULL:
1471                stop_queues(ctx);
1472                ctx->dev->rdev.stats.db_full++;
1473                break;
1474        case CXGB4_CONTROL_DB_EMPTY:
1475                resume_queues(ctx);
1476                mutex_lock(&ctx->dev->rdev.stats.lock);
1477                ctx->dev->rdev.stats.db_empty++;
1478                mutex_unlock(&ctx->dev->rdev.stats.lock);
1479                break;
1480        case CXGB4_CONTROL_DB_DROP:
1481                recover_queues(ctx);
1482                mutex_lock(&ctx->dev->rdev.stats.lock);
1483                ctx->dev->rdev.stats.db_drop++;
1484                mutex_unlock(&ctx->dev->rdev.stats.lock);
1485                break;
1486        default:
1487                pr_warn("%s: unknown control cmd %u\n",
1488                        pci_name(ctx->lldi.pdev), control);
1489                break;
1490        }
1491        return 0;
1492}
1493
1494static struct cxgb4_uld_info c4iw_uld_info = {
1495        .name = DRV_NAME,
1496        .nrxq = MAX_ULD_QSETS,
1497        .ntxq = MAX_ULD_QSETS,
1498        .rxq_size = 511,
1499        .ciq = true,
1500        .lro = false,
1501        .add = c4iw_uld_add,
1502        .rx_handler = c4iw_uld_rx_handler,
1503        .state_change = c4iw_uld_state_change,
1504        .control = c4iw_uld_control,
1505};
1506
1507void _c4iw_free_wr_wait(struct kref *kref)
1508{
1509        struct c4iw_wr_wait *wr_waitp;
1510
1511        wr_waitp = container_of(kref, struct c4iw_wr_wait, kref);
1512        pr_debug("Free wr_wait %p\n", wr_waitp);
1513        kfree(wr_waitp);
1514}
1515
1516struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp)
1517{
1518        struct c4iw_wr_wait *wr_waitp;
1519
1520        wr_waitp = kzalloc(sizeof(*wr_waitp), gfp);
1521        if (wr_waitp) {
1522                kref_init(&wr_waitp->kref);
1523                pr_debug("wr_wait %p\n", wr_waitp);
1524        }
1525        return wr_waitp;
1526}
1527
1528static int __init c4iw_init_module(void)
1529{
1530        int err;
1531
1532        err = c4iw_cm_init();
1533        if (err)
1534                return err;
1535
1536        c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
1537        if (!c4iw_debugfs_root)
1538                pr_warn("could not create debugfs entry, continuing\n");
1539
1540        reg_workq = create_singlethread_workqueue("Register_iWARP_device");
1541        if (!reg_workq) {
1542                pr_err("Failed creating workqueue to register iwarp device\n");
1543                return -ENOMEM;
1544        }
1545
1546        cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
1547
1548        return 0;
1549}
1550
1551static void __exit c4iw_exit_module(void)
1552{
1553        struct uld_ctx *ctx, *tmp;
1554
1555        mutex_lock(&dev_mutex);
1556        list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
1557                if (ctx->dev)
1558                        c4iw_remove(ctx);
1559                kfree(ctx);
1560        }
1561        mutex_unlock(&dev_mutex);
1562        flush_workqueue(reg_workq);
1563        destroy_workqueue(reg_workq);
1564        cxgb4_unregister_uld(CXGB4_ULD_RDMA);
1565        c4iw_cm_term();
1566        debugfs_remove_recursive(c4iw_debugfs_root);
1567}
1568
1569module_init(c4iw_init_module);
1570module_exit(c4iw_exit_module);
1571