linux/drivers/infiniband/hw/cxgb4/device.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/moduleparam.h>
  34#include <linux/debugfs.h>
  35#include <linux/vmalloc.h>
  36#include <linux/math64.h>
  37
  38#include <rdma/ib_verbs.h>
  39
  40#include "iw_cxgb4.h"
  41
  42#define DRV_VERSION "0.1"
  43
  44MODULE_AUTHOR("Steve Wise");
  45MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
  46MODULE_LICENSE("Dual BSD/GPL");
  47MODULE_VERSION(DRV_VERSION);
  48
  49static int allow_db_fc_on_t5;
  50module_param(allow_db_fc_on_t5, int, 0644);
  51MODULE_PARM_DESC(allow_db_fc_on_t5,
  52                 "Allow DB Flow Control on T5 (default = 0)");
  53
  54static int allow_db_coalescing_on_t5;
  55module_param(allow_db_coalescing_on_t5, int, 0644);
  56MODULE_PARM_DESC(allow_db_coalescing_on_t5,
  57                 "Allow DB Coalescing on T5 (default = 0)");
  58
  59int c4iw_wr_log = 0;
  60module_param(c4iw_wr_log, int, 0444);
  61MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
  62
  63static int c4iw_wr_log_size_order = 12;
  64module_param(c4iw_wr_log_size_order, int, 0444);
  65MODULE_PARM_DESC(c4iw_wr_log_size_order,
  66                 "Number of entries (log2) in the work request timing log.");
  67
  68struct uld_ctx {
  69        struct list_head entry;
  70        struct cxgb4_lld_info lldi;
  71        struct c4iw_dev *dev;
  72};
  73
  74static LIST_HEAD(uld_ctx_list);
  75static DEFINE_MUTEX(dev_mutex);
  76
  77#define DB_FC_RESUME_SIZE 64
  78#define DB_FC_RESUME_DELAY 1
  79#define DB_FC_DRAIN_THRESH 0
  80
  81static struct dentry *c4iw_debugfs_root;
  82
  83struct c4iw_debugfs_data {
  84        struct c4iw_dev *devp;
  85        char *buf;
  86        int bufsize;
  87        int pos;
  88};
  89
  90/* registered cxgb4 netlink callbacks */
  91static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
  92        [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
  93        [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
  94        [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
  95        [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
  96        [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
  97        [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
  98        [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
  99};
 100
 101static int count_idrs(int id, void *p, void *data)
 102{
 103        int *countp = data;
 104
 105        *countp = *countp + 1;
 106        return 0;
 107}
 108
 109static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
 110                            loff_t *ppos)
 111{
 112        struct c4iw_debugfs_data *d = file->private_data;
 113
 114        return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
 115}
 116
 117void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
 118{
 119        struct wr_log_entry le;
 120        int idx;
 121
 122        if (!wq->rdev->wr_log)
 123                return;
 124
 125        idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
 126                (wq->rdev->wr_log_size - 1);
 127        le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
 128        getnstimeofday(&le.poll_host_ts);
 129        le.valid = 1;
 130        le.cqe_sge_ts = CQE_TS(cqe);
 131        if (SQ_TYPE(cqe)) {
 132                le.qid = wq->sq.qid;
 133                le.opcode = CQE_OPCODE(cqe);
 134                le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts;
 135                le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
 136                le.wr_id = CQE_WRID_SQ_IDX(cqe);
 137        } else {
 138                le.qid = wq->rq.qid;
 139                le.opcode = FW_RI_RECEIVE;
 140                le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts;
 141                le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
 142                le.wr_id = CQE_WRID_MSN(cqe);
 143        }
 144        wq->rdev->wr_log[idx] = le;
 145}
 146
 147static int wr_log_show(struct seq_file *seq, void *v)
 148{
 149        struct c4iw_dev *dev = seq->private;
 150        struct timespec prev_ts = {0, 0};
 151        struct wr_log_entry *lep;
 152        int prev_ts_set = 0;
 153        int idx, end;
 154
 155#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
 156
 157        idx = atomic_read(&dev->rdev.wr_log_idx) &
 158                (dev->rdev.wr_log_size - 1);
 159        end = idx - 1;
 160        if (end < 0)
 161                end = dev->rdev.wr_log_size - 1;
 162        lep = &dev->rdev.wr_log[idx];
 163        while (idx != end) {
 164                if (lep->valid) {
 165                        if (!prev_ts_set) {
 166                                prev_ts_set = 1;
 167                                prev_ts = lep->poll_host_ts;
 168                        }
 169                        seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode "
 170                                   "%u %s 0x%x host_wr_delta sec %lu nsec %lu "
 171                                   "post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
 172                                   "poll_sge_ts 0x%llx post_poll_delta_ns %llu "
 173                                   "cqe_poll_delta_ns %llu\n",
 174                                   idx,
 175                                   timespec_sub(lep->poll_host_ts,
 176                                                prev_ts).tv_sec,
 177                                   timespec_sub(lep->poll_host_ts,
 178                                                prev_ts).tv_nsec,
 179                                   lep->qid, lep->opcode,
 180                                   lep->opcode == FW_RI_RECEIVE ?
 181                                                        "msn" : "wrid",
 182                                   lep->wr_id,
 183                                   timespec_sub(lep->poll_host_ts,
 184                                                lep->post_host_ts).tv_sec,
 185                                   timespec_sub(lep->poll_host_ts,
 186                                                lep->post_host_ts).tv_nsec,
 187                                   lep->post_sge_ts, lep->cqe_sge_ts,
 188                                   lep->poll_sge_ts,
 189                                   ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
 190                                   ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
 191                        prev_ts = lep->poll_host_ts;
 192                }
 193                idx++;
 194                if (idx > (dev->rdev.wr_log_size - 1))
 195                        idx = 0;
 196                lep = &dev->rdev.wr_log[idx];
 197        }
 198#undef ts2ns
 199        return 0;
 200}
 201
 202static int wr_log_open(struct inode *inode, struct file *file)
 203{
 204        return single_open(file, wr_log_show, inode->i_private);
 205}
 206
 207static ssize_t wr_log_clear(struct file *file, const char __user *buf,
 208                            size_t count, loff_t *pos)
 209{
 210        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 211        int i;
 212
 213        if (dev->rdev.wr_log)
 214                for (i = 0; i < dev->rdev.wr_log_size; i++)
 215                        dev->rdev.wr_log[i].valid = 0;
 216        return count;
 217}
 218
 219static const struct file_operations wr_log_debugfs_fops = {
 220        .owner   = THIS_MODULE,
 221        .open    = wr_log_open,
 222        .release = single_release,
 223        .read    = seq_read,
 224        .llseek  = seq_lseek,
 225        .write   = wr_log_clear,
 226};
 227
 228static int dump_qp(int id, void *p, void *data)
 229{
 230        struct c4iw_qp *qp = p;
 231        struct c4iw_debugfs_data *qpd = data;
 232        int space;
 233        int cc;
 234
 235        if (id != qp->wq.sq.qid)
 236                return 0;
 237
 238        space = qpd->bufsize - qpd->pos - 1;
 239        if (space == 0)
 240                return 1;
 241
 242        if (qp->ep) {
 243                if (qp->ep->com.local_addr.ss_family == AF_INET) {
 244                        struct sockaddr_in *lsin = (struct sockaddr_in *)
 245                                &qp->ep->com.local_addr;
 246                        struct sockaddr_in *rsin = (struct sockaddr_in *)
 247                                &qp->ep->com.remote_addr;
 248                        struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 249                                &qp->ep->com.mapped_local_addr;
 250                        struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 251                                &qp->ep->com.mapped_remote_addr;
 252
 253                        cc = snprintf(qpd->buf + qpd->pos, space,
 254                                      "rc qp sq id %u rq id %u state %u "
 255                                      "onchip %u ep tid %u state %u "
 256                                      "%pI4:%u/%u->%pI4:%u/%u\n",
 257                                      qp->wq.sq.qid, qp->wq.rq.qid,
 258                                      (int)qp->attr.state,
 259                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 260                                      qp->ep->hwtid, (int)qp->ep->com.state,
 261                                      &lsin->sin_addr, ntohs(lsin->sin_port),
 262                                      ntohs(mapped_lsin->sin_port),
 263                                      &rsin->sin_addr, ntohs(rsin->sin_port),
 264                                      ntohs(mapped_rsin->sin_port));
 265                } else {
 266                        struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 267                                &qp->ep->com.local_addr;
 268                        struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 269                                &qp->ep->com.remote_addr;
 270                        struct sockaddr_in6 *mapped_lsin6 =
 271                                (struct sockaddr_in6 *)
 272                                &qp->ep->com.mapped_local_addr;
 273                        struct sockaddr_in6 *mapped_rsin6 =
 274                                (struct sockaddr_in6 *)
 275                                &qp->ep->com.mapped_remote_addr;
 276
 277                        cc = snprintf(qpd->buf + qpd->pos, space,
 278                                      "rc qp sq id %u rq id %u state %u "
 279                                      "onchip %u ep tid %u state %u "
 280                                      "%pI6:%u/%u->%pI6:%u/%u\n",
 281                                      qp->wq.sq.qid, qp->wq.rq.qid,
 282                                      (int)qp->attr.state,
 283                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 284                                      qp->ep->hwtid, (int)qp->ep->com.state,
 285                                      &lsin6->sin6_addr,
 286                                      ntohs(lsin6->sin6_port),
 287                                      ntohs(mapped_lsin6->sin6_port),
 288                                      &rsin6->sin6_addr,
 289                                      ntohs(rsin6->sin6_port),
 290                                      ntohs(mapped_rsin6->sin6_port));
 291                }
 292        } else
 293                cc = snprintf(qpd->buf + qpd->pos, space,
 294                             "qp sq id %u rq id %u state %u onchip %u\n",
 295                              qp->wq.sq.qid, qp->wq.rq.qid,
 296                              (int)qp->attr.state,
 297                              qp->wq.sq.flags & T4_SQ_ONCHIP);
 298        if (cc < space)
 299                qpd->pos += cc;
 300        return 0;
 301}
 302
 303static int qp_release(struct inode *inode, struct file *file)
 304{
 305        struct c4iw_debugfs_data *qpd = file->private_data;
 306        if (!qpd) {
 307                printk(KERN_INFO "%s null qpd?\n", __func__);
 308                return 0;
 309        }
 310        vfree(qpd->buf);
 311        kfree(qpd);
 312        return 0;
 313}
 314
 315static int qp_open(struct inode *inode, struct file *file)
 316{
 317        struct c4iw_debugfs_data *qpd;
 318        int count = 1;
 319
 320        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
 321        if (!qpd)
 322                return -ENOMEM;
 323
 324        qpd->devp = inode->i_private;
 325        qpd->pos = 0;
 326
 327        spin_lock_irq(&qpd->devp->lock);
 328        idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
 329        spin_unlock_irq(&qpd->devp->lock);
 330
 331        qpd->bufsize = count * 128;
 332        qpd->buf = vmalloc(qpd->bufsize);
 333        if (!qpd->buf) {
 334                kfree(qpd);
 335                return -ENOMEM;
 336        }
 337
 338        spin_lock_irq(&qpd->devp->lock);
 339        idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
 340        spin_unlock_irq(&qpd->devp->lock);
 341
 342        qpd->buf[qpd->pos++] = 0;
 343        file->private_data = qpd;
 344        return 0;
 345}
 346
 347static const struct file_operations qp_debugfs_fops = {
 348        .owner   = THIS_MODULE,
 349        .open    = qp_open,
 350        .release = qp_release,
 351        .read    = debugfs_read,
 352        .llseek  = default_llseek,
 353};
 354
 355static int dump_stag(int id, void *p, void *data)
 356{
 357        struct c4iw_debugfs_data *stagd = data;
 358        int space;
 359        int cc;
 360        struct fw_ri_tpte tpte;
 361        int ret;
 362
 363        space = stagd->bufsize - stagd->pos - 1;
 364        if (space == 0)
 365                return 1;
 366
 367        ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8,
 368                              (__be32 *)&tpte);
 369        if (ret) {
 370                dev_err(&stagd->devp->rdev.lldi.pdev->dev,
 371                        "%s cxgb4_read_tpte err %d\n", __func__, ret);
 372                return ret;
 373        }
 374        cc = snprintf(stagd->buf + stagd->pos, space,
 375                      "stag: idx 0x%x valid %d key 0x%x state %d pdid %d "
 376                      "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
 377                      (u32)id<<8,
 378                      FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
 379                      FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
 380                      FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
 381                      FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
 382                      FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
 383                      FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
 384                      ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
 385                      ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
 386        if (cc < space)
 387                stagd->pos += cc;
 388        return 0;
 389}
 390
 391static int stag_release(struct inode *inode, struct file *file)
 392{
 393        struct c4iw_debugfs_data *stagd = file->private_data;
 394        if (!stagd) {
 395                printk(KERN_INFO "%s null stagd?\n", __func__);
 396                return 0;
 397        }
 398        vfree(stagd->buf);
 399        kfree(stagd);
 400        return 0;
 401}
 402
 403static int stag_open(struct inode *inode, struct file *file)
 404{
 405        struct c4iw_debugfs_data *stagd;
 406        int ret = 0;
 407        int count = 1;
 408
 409        stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
 410        if (!stagd) {
 411                ret = -ENOMEM;
 412                goto out;
 413        }
 414        stagd->devp = inode->i_private;
 415        stagd->pos = 0;
 416
 417        spin_lock_irq(&stagd->devp->lock);
 418        idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
 419        spin_unlock_irq(&stagd->devp->lock);
 420
 421        stagd->bufsize = count * 256;
 422        stagd->buf = vmalloc(stagd->bufsize);
 423        if (!stagd->buf) {
 424                ret = -ENOMEM;
 425                goto err1;
 426        }
 427
 428        spin_lock_irq(&stagd->devp->lock);
 429        idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
 430        spin_unlock_irq(&stagd->devp->lock);
 431
 432        stagd->buf[stagd->pos++] = 0;
 433        file->private_data = stagd;
 434        goto out;
 435err1:
 436        kfree(stagd);
 437out:
 438        return ret;
 439}
 440
 441static const struct file_operations stag_debugfs_fops = {
 442        .owner   = THIS_MODULE,
 443        .open    = stag_open,
 444        .release = stag_release,
 445        .read    = debugfs_read,
 446        .llseek  = default_llseek,
 447};
 448
 449static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
 450
 451static int stats_show(struct seq_file *seq, void *v)
 452{
 453        struct c4iw_dev *dev = seq->private;
 454
 455        seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
 456                   "Max", "Fail");
 457        seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
 458                        dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
 459                        dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
 460        seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 461                        dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 462                        dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
 463        seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 464                        dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 465                        dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
 466        seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
 467                        dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
 468                        dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
 469        seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
 470                        dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
 471                        dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
 472        seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
 473                        dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
 474                        dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
 475        seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
 476        seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
 477        seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
 478        seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
 479                   db_state_str[dev->db_state],
 480                   dev->rdev.stats.db_state_transitions,
 481                   dev->rdev.stats.db_fc_interruptions);
 482        seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
 483        seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
 484                   dev->rdev.stats.act_ofld_conn_fails);
 485        seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
 486                   dev->rdev.stats.pas_ofld_conn_fails);
 487        seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv);
 488        seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
 489        return 0;
 490}
 491
 492static int stats_open(struct inode *inode, struct file *file)
 493{
 494        return single_open(file, stats_show, inode->i_private);
 495}
 496
 497static ssize_t stats_clear(struct file *file, const char __user *buf,
 498                size_t count, loff_t *pos)
 499{
 500        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 501
 502        mutex_lock(&dev->rdev.stats.lock);
 503        dev->rdev.stats.pd.max = 0;
 504        dev->rdev.stats.pd.fail = 0;
 505        dev->rdev.stats.qid.max = 0;
 506        dev->rdev.stats.qid.fail = 0;
 507        dev->rdev.stats.stag.max = 0;
 508        dev->rdev.stats.stag.fail = 0;
 509        dev->rdev.stats.pbl.max = 0;
 510        dev->rdev.stats.pbl.fail = 0;
 511        dev->rdev.stats.rqt.max = 0;
 512        dev->rdev.stats.rqt.fail = 0;
 513        dev->rdev.stats.ocqp.max = 0;
 514        dev->rdev.stats.ocqp.fail = 0;
 515        dev->rdev.stats.db_full = 0;
 516        dev->rdev.stats.db_empty = 0;
 517        dev->rdev.stats.db_drop = 0;
 518        dev->rdev.stats.db_state_transitions = 0;
 519        dev->rdev.stats.tcam_full = 0;
 520        dev->rdev.stats.act_ofld_conn_fails = 0;
 521        dev->rdev.stats.pas_ofld_conn_fails = 0;
 522        mutex_unlock(&dev->rdev.stats.lock);
 523        return count;
 524}
 525
 526static const struct file_operations stats_debugfs_fops = {
 527        .owner   = THIS_MODULE,
 528        .open    = stats_open,
 529        .release = single_release,
 530        .read    = seq_read,
 531        .llseek  = seq_lseek,
 532        .write   = stats_clear,
 533};
 534
 535static int dump_ep(int id, void *p, void *data)
 536{
 537        struct c4iw_ep *ep = p;
 538        struct c4iw_debugfs_data *epd = data;
 539        int space;
 540        int cc;
 541
 542        space = epd->bufsize - epd->pos - 1;
 543        if (space == 0)
 544                return 1;
 545
 546        if (ep->com.local_addr.ss_family == AF_INET) {
 547                struct sockaddr_in *lsin = (struct sockaddr_in *)
 548                        &ep->com.local_addr;
 549                struct sockaddr_in *rsin = (struct sockaddr_in *)
 550                        &ep->com.remote_addr;
 551                struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 552                        &ep->com.mapped_local_addr;
 553                struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 554                        &ep->com.mapped_remote_addr;
 555
 556                cc = snprintf(epd->buf + epd->pos, space,
 557                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 558                              "history 0x%lx hwtid %d atid %d "
 559                              "conn_na %u abort_na %u "
 560                              "%pI4:%d/%d <-> %pI4:%d/%d\n",
 561                              ep, ep->com.cm_id, ep->com.qp,
 562                              (int)ep->com.state, ep->com.flags,
 563                              ep->com.history, ep->hwtid, ep->atid,
 564                              ep->stats.connect_neg_adv,
 565                              ep->stats.abort_neg_adv,
 566                              &lsin->sin_addr, ntohs(lsin->sin_port),
 567                              ntohs(mapped_lsin->sin_port),
 568                              &rsin->sin_addr, ntohs(rsin->sin_port),
 569                              ntohs(mapped_rsin->sin_port));
 570        } else {
 571                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 572                        &ep->com.local_addr;
 573                struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 574                        &ep->com.remote_addr;
 575                struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 576                        &ep->com.mapped_local_addr;
 577                struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
 578                        &ep->com.mapped_remote_addr;
 579
 580                cc = snprintf(epd->buf + epd->pos, space,
 581                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 582                              "history 0x%lx hwtid %d atid %d "
 583                              "conn_na %u abort_na %u "
 584                              "%pI6:%d/%d <-> %pI6:%d/%d\n",
 585                              ep, ep->com.cm_id, ep->com.qp,
 586                              (int)ep->com.state, ep->com.flags,
 587                              ep->com.history, ep->hwtid, ep->atid,
 588                              ep->stats.connect_neg_adv,
 589                              ep->stats.abort_neg_adv,
 590                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 591                              ntohs(mapped_lsin6->sin6_port),
 592                              &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
 593                              ntohs(mapped_rsin6->sin6_port));
 594        }
 595        if (cc < space)
 596                epd->pos += cc;
 597        return 0;
 598}
 599
 600static int dump_listen_ep(int id, void *p, void *data)
 601{
 602        struct c4iw_listen_ep *ep = p;
 603        struct c4iw_debugfs_data *epd = data;
 604        int space;
 605        int cc;
 606
 607        space = epd->bufsize - epd->pos - 1;
 608        if (space == 0)
 609                return 1;
 610
 611        if (ep->com.local_addr.ss_family == AF_INET) {
 612                struct sockaddr_in *lsin = (struct sockaddr_in *)
 613                        &ep->com.local_addr;
 614                struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 615                        &ep->com.mapped_local_addr;
 616
 617                cc = snprintf(epd->buf + epd->pos, space,
 618                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 619                              "backlog %d %pI4:%d/%d\n",
 620                              ep, ep->com.cm_id, (int)ep->com.state,
 621                              ep->com.flags, ep->stid, ep->backlog,
 622                              &lsin->sin_addr, ntohs(lsin->sin_port),
 623                              ntohs(mapped_lsin->sin_port));
 624        } else {
 625                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 626                        &ep->com.local_addr;
 627                struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 628                        &ep->com.mapped_local_addr;
 629
 630                cc = snprintf(epd->buf + epd->pos, space,
 631                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 632                              "backlog %d %pI6:%d/%d\n",
 633                              ep, ep->com.cm_id, (int)ep->com.state,
 634                              ep->com.flags, ep->stid, ep->backlog,
 635                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 636                              ntohs(mapped_lsin6->sin6_port));
 637        }
 638        if (cc < space)
 639                epd->pos += cc;
 640        return 0;
 641}
 642
 643static int ep_release(struct inode *inode, struct file *file)
 644{
 645        struct c4iw_debugfs_data *epd = file->private_data;
 646        if (!epd) {
 647                pr_info("%s null qpd?\n", __func__);
 648                return 0;
 649        }
 650        vfree(epd->buf);
 651        kfree(epd);
 652        return 0;
 653}
 654
 655static int ep_open(struct inode *inode, struct file *file)
 656{
 657        struct c4iw_debugfs_data *epd;
 658        int ret = 0;
 659        int count = 1;
 660
 661        epd = kmalloc(sizeof(*epd), GFP_KERNEL);
 662        if (!epd) {
 663                ret = -ENOMEM;
 664                goto out;
 665        }
 666        epd->devp = inode->i_private;
 667        epd->pos = 0;
 668
 669        spin_lock_irq(&epd->devp->lock);
 670        idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
 671        idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
 672        idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
 673        spin_unlock_irq(&epd->devp->lock);
 674
 675        epd->bufsize = count * 240;
 676        epd->buf = vmalloc(epd->bufsize);
 677        if (!epd->buf) {
 678                ret = -ENOMEM;
 679                goto err1;
 680        }
 681
 682        spin_lock_irq(&epd->devp->lock);
 683        idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
 684        idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
 685        idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
 686        spin_unlock_irq(&epd->devp->lock);
 687
 688        file->private_data = epd;
 689        goto out;
 690err1:
 691        kfree(epd);
 692out:
 693        return ret;
 694}
 695
 696static const struct file_operations ep_debugfs_fops = {
 697        .owner   = THIS_MODULE,
 698        .open    = ep_open,
 699        .release = ep_release,
 700        .read    = debugfs_read,
 701};
 702
 703static int setup_debugfs(struct c4iw_dev *devp)
 704{
 705        if (!devp->debugfs_root)
 706                return -1;
 707
 708        debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
 709                                 (void *)devp, &qp_debugfs_fops, 4096);
 710
 711        debugfs_create_file_size("stags", S_IWUSR, devp->debugfs_root,
 712                                 (void *)devp, &stag_debugfs_fops, 4096);
 713
 714        debugfs_create_file_size("stats", S_IWUSR, devp->debugfs_root,
 715                                 (void *)devp, &stats_debugfs_fops, 4096);
 716
 717        debugfs_create_file_size("eps", S_IWUSR, devp->debugfs_root,
 718                                 (void *)devp, &ep_debugfs_fops, 4096);
 719
 720        if (c4iw_wr_log)
 721                debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
 722                                         (void *)devp, &wr_log_debugfs_fops, 4096);
 723        return 0;
 724}
 725
 726void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 727                               struct c4iw_dev_ucontext *uctx)
 728{
 729        struct list_head *pos, *nxt;
 730        struct c4iw_qid_list *entry;
 731
 732        mutex_lock(&uctx->lock);
 733        list_for_each_safe(pos, nxt, &uctx->qpids) {
 734                entry = list_entry(pos, struct c4iw_qid_list, entry);
 735                list_del_init(&entry->entry);
 736                if (!(entry->qid & rdev->qpmask)) {
 737                        c4iw_put_resource(&rdev->resource.qid_table,
 738                                          entry->qid);
 739                        mutex_lock(&rdev->stats.lock);
 740                        rdev->stats.qid.cur -= rdev->qpmask + 1;
 741                        mutex_unlock(&rdev->stats.lock);
 742                }
 743                kfree(entry);
 744        }
 745
 746        list_for_each_safe(pos, nxt, &uctx->qpids) {
 747                entry = list_entry(pos, struct c4iw_qid_list, entry);
 748                list_del_init(&entry->entry);
 749                kfree(entry);
 750        }
 751        mutex_unlock(&uctx->lock);
 752}
 753
 754void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
 755                            struct c4iw_dev_ucontext *uctx)
 756{
 757        INIT_LIST_HEAD(&uctx->qpids);
 758        INIT_LIST_HEAD(&uctx->cqids);
 759        mutex_init(&uctx->lock);
 760}
 761
 762/* Caller takes care of locking if needed */
 763static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 764{
 765        int err;
 766
 767        c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 768
 769        /*
 770         * This implementation assumes udb_density == ucq_density!  Eventually
 771         * we might need to support this but for now fail the open. Also the
 772         * cqid and qpid range must match for now.
 773         */
 774        if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
 775                pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
 776                       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
 777                       rdev->lldi.ucq_density);
 778                return -EINVAL;
 779        }
 780        if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
 781            rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
 782                pr_err(MOD "%s: unsupported qp and cq id ranges "
 783                       "qp start %u size %u cq start %u size %u\n",
 784                       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
 785                       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
 786                       rdev->lldi.vr->cq.size);
 787                return -EINVAL;
 788        }
 789
 790        rdev->qpmask = rdev->lldi.udb_density - 1;
 791        rdev->cqmask = rdev->lldi.ucq_density - 1;
 792        PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
 793             "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
 794             "qp qid start %u size %u cq qid start %u size %u\n",
 795             __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 796             rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 797             rdev->lldi.vr->pbl.start,
 798             rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
 799             rdev->lldi.vr->rq.size,
 800             rdev->lldi.vr->qp.start,
 801             rdev->lldi.vr->qp.size,
 802             rdev->lldi.vr->cq.start,
 803             rdev->lldi.vr->cq.size);
 804        PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p "
 805             "qpmask 0x%x cqmask 0x%x\n",
 806             (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
 807             (void *)pci_resource_start(rdev->lldi.pdev, 2),
 808             rdev->lldi.db_reg, rdev->lldi.gts_reg,
 809             rdev->qpmask, rdev->cqmask);
 810
 811        if (c4iw_num_stags(rdev) == 0)
 812                return -EINVAL;
 813
 814        rdev->stats.pd.total = T4_MAX_NUM_PD;
 815        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 816        rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 817        rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
 818        rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 819        rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 820
 821        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
 822        if (err) {
 823                printk(KERN_ERR MOD "error %d initializing resources\n", err);
 824                return err;
 825        }
 826        err = c4iw_pblpool_create(rdev);
 827        if (err) {
 828                printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
 829                goto destroy_resource;
 830        }
 831        err = c4iw_rqtpool_create(rdev);
 832        if (err) {
 833                printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
 834                goto destroy_pblpool;
 835        }
 836        err = c4iw_ocqp_pool_create(rdev);
 837        if (err) {
 838                printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
 839                goto destroy_rqtpool;
 840        }
 841        rdev->status_page = (struct t4_dev_status_page *)
 842                            __get_free_page(GFP_KERNEL);
 843        if (!rdev->status_page)
 844                goto destroy_ocqp_pool;
 845        rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
 846        rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
 847        rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
 848        rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
 849
 850        if (c4iw_wr_log) {
 851                rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
 852                                       sizeof(*rdev->wr_log), GFP_KERNEL);
 853                if (rdev->wr_log) {
 854                        rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
 855                        atomic_set(&rdev->wr_log_idx, 0);
 856                } else {
 857                        pr_err(MOD "error allocating wr_log. Logging disabled\n");
 858                }
 859        }
 860
 861        rdev->status_page->db_off = 0;
 862
 863        return 0;
 864destroy_ocqp_pool:
 865        c4iw_ocqp_pool_destroy(rdev);
 866destroy_rqtpool:
 867        c4iw_rqtpool_destroy(rdev);
 868destroy_pblpool:
 869        c4iw_pblpool_destroy(rdev);
 870destroy_resource:
 871        c4iw_destroy_resource(&rdev->resource);
 872        return err;
 873}
 874
 875static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 876{
 877        kfree(rdev->wr_log);
 878        free_page((unsigned long)rdev->status_page);
 879        c4iw_pblpool_destroy(rdev);
 880        c4iw_rqtpool_destroy(rdev);
 881        c4iw_destroy_resource(&rdev->resource);
 882}
 883
 884static void c4iw_dealloc(struct uld_ctx *ctx)
 885{
 886        c4iw_rdev_close(&ctx->dev->rdev);
 887        idr_destroy(&ctx->dev->cqidr);
 888        idr_destroy(&ctx->dev->qpidr);
 889        idr_destroy(&ctx->dev->mmidr);
 890        idr_destroy(&ctx->dev->hwtid_idr);
 891        idr_destroy(&ctx->dev->stid_idr);
 892        idr_destroy(&ctx->dev->atid_idr);
 893        if (ctx->dev->rdev.bar2_kva)
 894                iounmap(ctx->dev->rdev.bar2_kva);
 895        if (ctx->dev->rdev.oc_mw_kva)
 896                iounmap(ctx->dev->rdev.oc_mw_kva);
 897        ib_dealloc_device(&ctx->dev->ibdev);
 898        ctx->dev = NULL;
 899}
 900
 901static void c4iw_remove(struct uld_ctx *ctx)
 902{
 903        PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
 904        c4iw_unregister_device(ctx->dev);
 905        c4iw_dealloc(ctx);
 906}
 907
 908static int rdma_supported(const struct cxgb4_lld_info *infop)
 909{
 910        return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
 911               infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
 912               infop->vr->cq.size > 0;
 913}
 914
 915static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 916{
 917        struct c4iw_dev *devp;
 918        int ret;
 919
 920        if (!rdma_supported(infop)) {
 921                printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
 922                       pci_name(infop->pdev));
 923                return ERR_PTR(-ENOSYS);
 924        }
 925        if (!ocqp_supported(infop))
 926                pr_info("%s: On-Chip Queues not supported on this device.\n",
 927                        pci_name(infop->pdev));
 928
 929        devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
 930        if (!devp) {
 931                printk(KERN_ERR MOD "Cannot allocate ib device\n");
 932                return ERR_PTR(-ENOMEM);
 933        }
 934        devp->rdev.lldi = *infop;
 935
 936        /* init various hw-queue params based on lld info */
 937        PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
 938             __func__, devp->rdev.lldi.sge_ingpadboundary,
 939             devp->rdev.lldi.sge_egrstatuspagesize);
 940
 941        devp->rdev.hw_queue.t4_eq_status_entries =
 942                devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
 943        devp->rdev.hw_queue.t4_max_eq_size = 65520;
 944        devp->rdev.hw_queue.t4_max_iq_size = 65520;
 945        devp->rdev.hw_queue.t4_max_rq_size = 8192 -
 946                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 947        devp->rdev.hw_queue.t4_max_sq_size =
 948                devp->rdev.hw_queue.t4_max_eq_size -
 949                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 950        devp->rdev.hw_queue.t4_max_qp_depth =
 951                devp->rdev.hw_queue.t4_max_rq_size;
 952        devp->rdev.hw_queue.t4_max_cq_depth =
 953                devp->rdev.hw_queue.t4_max_iq_size - 2;
 954        devp->rdev.hw_queue.t4_stat_len =
 955                devp->rdev.lldi.sge_egrstatuspagesize;
 956
 957        /*
 958         * For T5/T6 devices, we map all of BAR2 with WC.
 959         * For T4 devices with onchip qp mem, we map only that part
 960         * of BAR2 with WC.
 961         */
 962        devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
 963        if (!is_t4(devp->rdev.lldi.adapter_type)) {
 964                devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
 965                        pci_resource_len(devp->rdev.lldi.pdev, 2));
 966                if (!devp->rdev.bar2_kva) {
 967                        pr_err(MOD "Unable to ioremap BAR2\n");
 968                        ib_dealloc_device(&devp->ibdev);
 969                        return ERR_PTR(-EINVAL);
 970                }
 971        } else if (ocqp_supported(infop)) {
 972                devp->rdev.oc_mw_pa =
 973                        pci_resource_start(devp->rdev.lldi.pdev, 2) +
 974                        pci_resource_len(devp->rdev.lldi.pdev, 2) -
 975                        roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
 976                devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
 977                        devp->rdev.lldi.vr->ocq.size);
 978                if (!devp->rdev.oc_mw_kva) {
 979                        pr_err(MOD "Unable to ioremap onchip mem\n");
 980                        ib_dealloc_device(&devp->ibdev);
 981                        return ERR_PTR(-EINVAL);
 982                }
 983        }
 984
 985        PDBG(KERN_INFO MOD "ocq memory: "
 986               "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
 987               devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
 988               devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
 989
 990        ret = c4iw_rdev_open(&devp->rdev);
 991        if (ret) {
 992                printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
 993                ib_dealloc_device(&devp->ibdev);
 994                return ERR_PTR(ret);
 995        }
 996
 997        idr_init(&devp->cqidr);
 998        idr_init(&devp->qpidr);
 999        idr_init(&devp->mmidr);
1000        idr_init(&devp->hwtid_idr);
1001        idr_init(&devp->stid_idr);
1002        idr_init(&devp->atid_idr);
1003        spin_lock_init(&devp->lock);
1004        mutex_init(&devp->rdev.stats.lock);
1005        mutex_init(&devp->db_mutex);
1006        INIT_LIST_HEAD(&devp->db_fc_list);
1007        devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
1008
1009        if (c4iw_debugfs_root) {
1010                devp->debugfs_root = debugfs_create_dir(
1011                                        pci_name(devp->rdev.lldi.pdev),
1012                                        c4iw_debugfs_root);
1013                setup_debugfs(devp);
1014        }
1015
1016
1017        return devp;
1018}
1019
1020static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
1021{
1022        struct uld_ctx *ctx;
1023        static int vers_printed;
1024        int i;
1025
1026        if (!vers_printed++)
1027                pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
1028                        DRV_VERSION);
1029
1030        ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
1031        if (!ctx) {
1032                ctx = ERR_PTR(-ENOMEM);
1033                goto out;
1034        }
1035        ctx->lldi = *infop;
1036
1037        PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
1038             __func__, pci_name(ctx->lldi.pdev),
1039             ctx->lldi.nchan, ctx->lldi.nrxq,
1040             ctx->lldi.ntxq, ctx->lldi.nports);
1041
1042        mutex_lock(&dev_mutex);
1043        list_add_tail(&ctx->entry, &uld_ctx_list);
1044        mutex_unlock(&dev_mutex);
1045
1046        for (i = 0; i < ctx->lldi.nrxq; i++)
1047                PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
1048out:
1049        return ctx;
1050}
1051
1052static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
1053                                                 const __be64 *rsp,
1054                                                 u32 pktshift)
1055{
1056        struct sk_buff *skb;
1057
1058        /*
1059         * Allocate space for cpl_pass_accept_req which will be synthesized by
1060         * driver. Once the driver synthesizes the request the skb will go
1061         * through the regular cpl_pass_accept_req processing.
1062         * The math here assumes sizeof cpl_pass_accept_req >= sizeof
1063         * cpl_rx_pkt.
1064         */
1065        skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1066                        sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
1067        if (unlikely(!skb))
1068                return NULL;
1069
1070         __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1071                   sizeof(struct rss_header) - pktshift);
1072
1073        /*
1074         * This skb will contain:
1075         *   rss_header from the rspq descriptor (1 flit)
1076         *   cpl_rx_pkt struct from the rspq descriptor (2 flits)
1077         *   space for the difference between the size of an
1078         *      rx_pkt and pass_accept_req cpl (1 flit)
1079         *   the packet data from the gl
1080         */
1081        skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
1082                                sizeof(struct rss_header));
1083        skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
1084                                       sizeof(struct cpl_pass_accept_req),
1085                                       gl->va + pktshift,
1086                                       gl->tot_len - pktshift);
1087        return skb;
1088}
1089
1090static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
1091                           const __be64 *rsp)
1092{
1093        unsigned int opcode = *(u8 *)rsp;
1094        struct sk_buff *skb;
1095
1096        if (opcode != CPL_RX_PKT)
1097                goto out;
1098
1099        skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
1100        if (skb == NULL)
1101                goto out;
1102
1103        if (c4iw_handlers[opcode] == NULL) {
1104                pr_info("%s no handler opcode 0x%x...\n", __func__,
1105                       opcode);
1106                kfree_skb(skb);
1107                goto out;
1108        }
1109        c4iw_handlers[opcode](dev, skb);
1110        return 1;
1111out:
1112        return 0;
1113}
1114
1115static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
1116                        const struct pkt_gl *gl)
1117{
1118        struct uld_ctx *ctx = handle;
1119        struct c4iw_dev *dev = ctx->dev;
1120        struct sk_buff *skb;
1121        u8 opcode;
1122
1123        if (gl == NULL) {
1124                /* omit RSS and rsp_ctrl at end of descriptor */
1125                unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
1126
1127                skb = alloc_skb(256, GFP_ATOMIC);
1128                if (!skb)
1129                        goto nomem;
1130                __skb_put(skb, len);
1131                skb_copy_to_linear_data(skb, &rsp[1], len);
1132        } else if (gl == CXGB4_MSG_AN) {
1133                const struct rsp_ctrl *rc = (void *)rsp;
1134
1135                u32 qid = be32_to_cpu(rc->pldbuflen_qid);
1136                c4iw_ev_handler(dev, qid);
1137                return 0;
1138        } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
1139                if (recv_rx_pkt(dev, gl, rsp))
1140                        return 0;
1141
1142                pr_info("%s: unexpected FL contents at %p, " \
1143                       "RSS %#llx, FL %#llx, len %u\n",
1144                       pci_name(ctx->lldi.pdev), gl->va,
1145                       (unsigned long long)be64_to_cpu(*rsp),
1146                       (unsigned long long)be64_to_cpu(
1147                       *(__force __be64 *)gl->va),
1148                       gl->tot_len);
1149
1150                return 0;
1151        } else {
1152                skb = cxgb4_pktgl_to_skb(gl, 128, 128);
1153                if (unlikely(!skb))
1154                        goto nomem;
1155        }
1156
1157        opcode = *(u8 *)rsp;
1158        if (c4iw_handlers[opcode]) {
1159                c4iw_handlers[opcode](dev, skb);
1160        } else {
1161                pr_info("%s no handler opcode 0x%x...\n", __func__,
1162                       opcode);
1163                kfree_skb(skb);
1164        }
1165
1166        return 0;
1167nomem:
1168        return -1;
1169}
1170
1171static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
1172{
1173        struct uld_ctx *ctx = handle;
1174
1175        PDBG("%s new_state %u\n", __func__, new_state);
1176        switch (new_state) {
1177        case CXGB4_STATE_UP:
1178                printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
1179                if (!ctx->dev) {
1180                        int ret;
1181
1182                        ctx->dev = c4iw_alloc(&ctx->lldi);
1183                        if (IS_ERR(ctx->dev)) {
1184                                printk(KERN_ERR MOD
1185                                       "%s: initialization failed: %ld\n",
1186                                       pci_name(ctx->lldi.pdev),
1187                                       PTR_ERR(ctx->dev));
1188                                ctx->dev = NULL;
1189                                break;
1190                        }
1191                        ret = c4iw_register_device(ctx->dev);
1192                        if (ret) {
1193                                printk(KERN_ERR MOD
1194                                       "%s: RDMA registration failed: %d\n",
1195                                       pci_name(ctx->lldi.pdev), ret);
1196                                c4iw_dealloc(ctx);
1197                        }
1198                }
1199                break;
1200        case CXGB4_STATE_DOWN:
1201                printk(KERN_INFO MOD "%s: Down\n",
1202                       pci_name(ctx->lldi.pdev));
1203                if (ctx->dev)
1204                        c4iw_remove(ctx);
1205                break;
1206        case CXGB4_STATE_START_RECOVERY:
1207                printk(KERN_INFO MOD "%s: Fatal Error\n",
1208                       pci_name(ctx->lldi.pdev));
1209                if (ctx->dev) {
1210                        struct ib_event event;
1211
1212                        ctx->dev->rdev.flags |= T4_FATAL_ERROR;
1213                        memset(&event, 0, sizeof event);
1214                        event.event  = IB_EVENT_DEVICE_FATAL;
1215                        event.device = &ctx->dev->ibdev;
1216                        ib_dispatch_event(&event);
1217                        c4iw_remove(ctx);
1218                }
1219                break;
1220        case CXGB4_STATE_DETACH:
1221                printk(KERN_INFO MOD "%s: Detach\n",
1222                       pci_name(ctx->lldi.pdev));
1223                if (ctx->dev)
1224                        c4iw_remove(ctx);
1225                break;
1226        }
1227        return 0;
1228}
1229
1230static int disable_qp_db(int id, void *p, void *data)
1231{
1232        struct c4iw_qp *qp = p;
1233
1234        t4_disable_wq_db(&qp->wq);
1235        return 0;
1236}
1237
1238static void stop_queues(struct uld_ctx *ctx)
1239{
1240        unsigned long flags;
1241
1242        spin_lock_irqsave(&ctx->dev->lock, flags);
1243        ctx->dev->rdev.stats.db_state_transitions++;
1244        ctx->dev->db_state = STOPPED;
1245        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
1246                idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
1247        else
1248                ctx->dev->rdev.status_page->db_off = 1;
1249        spin_unlock_irqrestore(&ctx->dev->lock, flags);
1250}
1251
1252static int enable_qp_db(int id, void *p, void *data)
1253{
1254        struct c4iw_qp *qp = p;
1255
1256        t4_enable_wq_db(&qp->wq);
1257        return 0;
1258}
1259
1260static void resume_rc_qp(struct c4iw_qp *qp)
1261{
1262        spin_lock(&qp->lock);
1263        t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
1264        qp->wq.sq.wq_pidx_inc = 0;
1265        t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
1266        qp->wq.rq.wq_pidx_inc = 0;
1267        spin_unlock(&qp->lock);
1268}
1269
1270static void resume_a_chunk(struct uld_ctx *ctx)
1271{
1272        int i;
1273        struct c4iw_qp *qp;
1274
1275        for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
1276                qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
1277                                      db_fc_entry);
1278                list_del_init(&qp->db_fc_entry);
1279                resume_rc_qp(qp);
1280                if (list_empty(&ctx->dev->db_fc_list))
1281                        break;
1282        }
1283}
1284
1285static void resume_queues(struct uld_ctx *ctx)
1286{
1287        spin_lock_irq(&ctx->dev->lock);
1288        if (ctx->dev->db_state != STOPPED)
1289                goto out;
1290        ctx->dev->db_state = FLOW_CONTROL;
1291        while (1) {
1292                if (list_empty(&ctx->dev->db_fc_list)) {
1293                        WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
1294                        ctx->dev->db_state = NORMAL;
1295                        ctx->dev->rdev.stats.db_state_transitions++;
1296                        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
1297                                idr_for_each(&ctx->dev->qpidr, enable_qp_db,
1298                                             NULL);
1299                        } else {
1300                                ctx->dev->rdev.status_page->db_off = 0;
1301                        }
1302                        break;
1303                } else {
1304                        if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
1305                            < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
1306                               DB_FC_DRAIN_THRESH)) {
1307                                resume_a_chunk(ctx);
1308                        }
1309                        if (!list_empty(&ctx->dev->db_fc_list)) {
1310                                spin_unlock_irq(&ctx->dev->lock);
1311                                if (DB_FC_RESUME_DELAY) {
1312                                        set_current_state(TASK_UNINTERRUPTIBLE);
1313                                        schedule_timeout(DB_FC_RESUME_DELAY);
1314                                }
1315                                spin_lock_irq(&ctx->dev->lock);
1316                                if (ctx->dev->db_state != FLOW_CONTROL)
1317                                        break;
1318                        }
1319                }
1320        }
1321out:
1322        if (ctx->dev->db_state != NORMAL)
1323                ctx->dev->rdev.stats.db_fc_interruptions++;
1324        spin_unlock_irq(&ctx->dev->lock);
1325}
1326
1327struct qp_list {
1328        unsigned idx;
1329        struct c4iw_qp **qps;
1330};
1331
1332static int add_and_ref_qp(int id, void *p, void *data)
1333{
1334        struct qp_list *qp_listp = data;
1335        struct c4iw_qp *qp = p;
1336
1337        c4iw_qp_add_ref(&qp->ibqp);
1338        qp_listp->qps[qp_listp->idx++] = qp;
1339        return 0;
1340}
1341
1342static int count_qps(int id, void *p, void *data)
1343{
1344        unsigned *countp = data;
1345        (*countp)++;
1346        return 0;
1347}
1348
1349static void deref_qps(struct qp_list *qp_list)
1350{
1351        int idx;
1352
1353        for (idx = 0; idx < qp_list->idx; idx++)
1354                c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
1355}
1356
1357static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
1358{
1359        int idx;
1360        int ret;
1361
1362        for (idx = 0; idx < qp_list->idx; idx++) {
1363                struct c4iw_qp *qp = qp_list->qps[idx];
1364
1365                spin_lock_irq(&qp->rhp->lock);
1366                spin_lock(&qp->lock);
1367                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1368                                          qp->wq.sq.qid,
1369                                          t4_sq_host_wq_pidx(&qp->wq),
1370                                          t4_sq_wq_size(&qp->wq));
1371                if (ret) {
1372                        pr_err(MOD "%s: Fatal error - "
1373                               "DB overflow recovery failed - "
1374                               "error syncing SQ qid %u\n",
1375                               pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
1376                        spin_unlock(&qp->lock);
1377                        spin_unlock_irq(&qp->rhp->lock);
1378                        return;
1379                }
1380                qp->wq.sq.wq_pidx_inc = 0;
1381
1382                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1383                                          qp->wq.rq.qid,
1384                                          t4_rq_host_wq_pidx(&qp->wq),
1385                                          t4_rq_wq_size(&qp->wq));
1386
1387                if (ret) {
1388                        pr_err(MOD "%s: Fatal error - "
1389                               "DB overflow recovery failed - "
1390                               "error syncing RQ qid %u\n",
1391                               pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
1392                        spin_unlock(&qp->lock);
1393                        spin_unlock_irq(&qp->rhp->lock);
1394                        return;
1395                }
1396                qp->wq.rq.wq_pidx_inc = 0;
1397                spin_unlock(&qp->lock);
1398                spin_unlock_irq(&qp->rhp->lock);
1399
1400                /* Wait for the dbfifo to drain */
1401                while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
1402                        set_current_state(TASK_UNINTERRUPTIBLE);
1403                        schedule_timeout(usecs_to_jiffies(10));
1404                }
1405        }
1406}
1407
1408static void recover_queues(struct uld_ctx *ctx)
1409{
1410        int count = 0;
1411        struct qp_list qp_list;
1412        int ret;
1413
1414        /* slow everybody down */
1415        set_current_state(TASK_UNINTERRUPTIBLE);
1416        schedule_timeout(usecs_to_jiffies(1000));
1417
1418        /* flush the SGE contexts */
1419        ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
1420        if (ret) {
1421                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1422                       pci_name(ctx->lldi.pdev));
1423                return;
1424        }
1425
1426        /* Count active queues so we can build a list of queues to recover */
1427        spin_lock_irq(&ctx->dev->lock);
1428        WARN_ON(ctx->dev->db_state != STOPPED);
1429        ctx->dev->db_state = RECOVERY;
1430        idr_for_each(&ctx->dev->qpidr, count_qps, &count);
1431
1432        qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
1433        if (!qp_list.qps) {
1434                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1435                       pci_name(ctx->lldi.pdev));
1436                spin_unlock_irq(&ctx->dev->lock);
1437                return;
1438        }
1439        qp_list.idx = 0;
1440
1441        /* add and ref each qp so it doesn't get freed */
1442        idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
1443
1444        spin_unlock_irq(&ctx->dev->lock);
1445
1446        /* now traverse the list in a safe context to recover the db state*/
1447        recover_lost_dbs(ctx, &qp_list);
1448
1449        /* we're almost done!  deref the qps and clean up */
1450        deref_qps(&qp_list);
1451        kfree(qp_list.qps);
1452
1453        spin_lock_irq(&ctx->dev->lock);
1454        WARN_ON(ctx->dev->db_state != RECOVERY);
1455        ctx->dev->db_state = STOPPED;
1456        spin_unlock_irq(&ctx->dev->lock);
1457}
1458
1459static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
1460{
1461        struct uld_ctx *ctx = handle;
1462
1463        switch (control) {
1464        case CXGB4_CONTROL_DB_FULL:
1465                stop_queues(ctx);
1466                ctx->dev->rdev.stats.db_full++;
1467                break;
1468        case CXGB4_CONTROL_DB_EMPTY:
1469                resume_queues(ctx);
1470                mutex_lock(&ctx->dev->rdev.stats.lock);
1471                ctx->dev->rdev.stats.db_empty++;
1472                mutex_unlock(&ctx->dev->rdev.stats.lock);
1473                break;
1474        case CXGB4_CONTROL_DB_DROP:
1475                recover_queues(ctx);
1476                mutex_lock(&ctx->dev->rdev.stats.lock);
1477                ctx->dev->rdev.stats.db_drop++;
1478                mutex_unlock(&ctx->dev->rdev.stats.lock);
1479                break;
1480        default:
1481                printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
1482                       pci_name(ctx->lldi.pdev), control);
1483                break;
1484        }
1485        return 0;
1486}
1487
1488static struct cxgb4_uld_info c4iw_uld_info = {
1489        .name = DRV_NAME,
1490        .add = c4iw_uld_add,
1491        .rx_handler = c4iw_uld_rx_handler,
1492        .state_change = c4iw_uld_state_change,
1493        .control = c4iw_uld_control,
1494};
1495
1496static int __init c4iw_init_module(void)
1497{
1498        int err;
1499
1500        err = c4iw_cm_init();
1501        if (err)
1502                return err;
1503
1504        c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
1505        if (!c4iw_debugfs_root)
1506                printk(KERN_WARNING MOD
1507                       "could not create debugfs entry, continuing\n");
1508
1509        if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
1510                            c4iw_nl_cb_table))
1511                pr_err("%s[%u]: Failed to add netlink callback\n"
1512                       , __func__, __LINE__);
1513
1514        err = iwpm_init(RDMA_NL_C4IW);
1515        if (err) {
1516                pr_err("port mapper initialization failed with %d\n", err);
1517                ibnl_remove_client(RDMA_NL_C4IW);
1518                c4iw_cm_term();
1519                debugfs_remove_recursive(c4iw_debugfs_root);
1520                return err;
1521        }
1522
1523        cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
1524
1525        return 0;
1526}
1527
1528static void __exit c4iw_exit_module(void)
1529{
1530        struct uld_ctx *ctx, *tmp;
1531
1532        mutex_lock(&dev_mutex);
1533        list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
1534                if (ctx->dev)
1535                        c4iw_remove(ctx);
1536                kfree(ctx);
1537        }
1538        mutex_unlock(&dev_mutex);
1539        cxgb4_unregister_uld(CXGB4_ULD_RDMA);
1540        iwpm_exit(RDMA_NL_C4IW);
1541        ibnl_remove_client(RDMA_NL_C4IW);
1542        c4iw_cm_term();
1543        debugfs_remove_recursive(c4iw_debugfs_root);
1544}
1545
1546module_init(c4iw_init_module);
1547module_exit(c4iw_exit_module);
1548