linux/drivers/infiniband/hw/cxgb4/device.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/moduleparam.h>
  34#include <linux/debugfs.h>
  35#include <linux/vmalloc.h>
  36#include <linux/math64.h>
  37
  38#include <rdma/ib_verbs.h>
  39
  40#include "iw_cxgb4.h"
  41
  42#define DRV_VERSION "0.1"
  43
  44MODULE_AUTHOR("Steve Wise");
  45MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
  46MODULE_LICENSE("Dual BSD/GPL");
  47MODULE_VERSION(DRV_VERSION);
  48
  49static int allow_db_fc_on_t5;
  50module_param(allow_db_fc_on_t5, int, 0644);
  51MODULE_PARM_DESC(allow_db_fc_on_t5,
  52                 "Allow DB Flow Control on T5 (default = 0)");
  53
  54static int allow_db_coalescing_on_t5;
  55module_param(allow_db_coalescing_on_t5, int, 0644);
  56MODULE_PARM_DESC(allow_db_coalescing_on_t5,
  57                 "Allow DB Coalescing on T5 (default = 0)");
  58
  59int c4iw_wr_log = 0;
  60module_param(c4iw_wr_log, int, 0444);
  61MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
  62
  63static int c4iw_wr_log_size_order = 12;
  64module_param(c4iw_wr_log_size_order, int, 0444);
  65MODULE_PARM_DESC(c4iw_wr_log_size_order,
  66                 "Number of entries (log2) in the work request timing log.");
  67
  68struct uld_ctx {
  69        struct list_head entry;
  70        struct cxgb4_lld_info lldi;
  71        struct c4iw_dev *dev;
  72};
  73
  74static LIST_HEAD(uld_ctx_list);
  75static DEFINE_MUTEX(dev_mutex);
  76
  77#define DB_FC_RESUME_SIZE 64
  78#define DB_FC_RESUME_DELAY 1
  79#define DB_FC_DRAIN_THRESH 0
  80
  81static struct dentry *c4iw_debugfs_root;
  82
  83struct c4iw_debugfs_data {
  84        struct c4iw_dev *devp;
  85        char *buf;
  86        int bufsize;
  87        int pos;
  88};
  89
  90/* registered cxgb4 netlink callbacks */
  91static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
  92        [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
  93        [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
  94        [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
  95        [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
  96        [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
  97        [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
  98        [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
  99};
 100
 101static int count_idrs(int id, void *p, void *data)
 102{
 103        int *countp = data;
 104
 105        *countp = *countp + 1;
 106        return 0;
 107}
 108
 109static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
 110                            loff_t *ppos)
 111{
 112        struct c4iw_debugfs_data *d = file->private_data;
 113
 114        return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
 115}
 116
 117void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
 118{
 119        struct wr_log_entry le;
 120        int idx;
 121
 122        if (!wq->rdev->wr_log)
 123                return;
 124
 125        idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
 126                (wq->rdev->wr_log_size - 1);
 127        le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
 128        getnstimeofday(&le.poll_host_ts);
 129        le.valid = 1;
 130        le.cqe_sge_ts = CQE_TS(cqe);
 131        if (SQ_TYPE(cqe)) {
 132                le.qid = wq->sq.qid;
 133                le.opcode = CQE_OPCODE(cqe);
 134                le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts;
 135                le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
 136                le.wr_id = CQE_WRID_SQ_IDX(cqe);
 137        } else {
 138                le.qid = wq->rq.qid;
 139                le.opcode = FW_RI_RECEIVE;
 140                le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts;
 141                le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
 142                le.wr_id = CQE_WRID_MSN(cqe);
 143        }
 144        wq->rdev->wr_log[idx] = le;
 145}
 146
 147static int wr_log_show(struct seq_file *seq, void *v)
 148{
 149        struct c4iw_dev *dev = seq->private;
 150        struct timespec prev_ts = {0, 0};
 151        struct wr_log_entry *lep;
 152        int prev_ts_set = 0;
 153        int idx, end;
 154
 155#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
 156
 157        idx = atomic_read(&dev->rdev.wr_log_idx) &
 158                (dev->rdev.wr_log_size - 1);
 159        end = idx - 1;
 160        if (end < 0)
 161                end = dev->rdev.wr_log_size - 1;
 162        lep = &dev->rdev.wr_log[idx];
 163        while (idx != end) {
 164                if (lep->valid) {
 165                        if (!prev_ts_set) {
 166                                prev_ts_set = 1;
 167                                prev_ts = lep->poll_host_ts;
 168                        }
 169                        seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode "
 170                                   "%u %s 0x%x host_wr_delta sec %lu nsec %lu "
 171                                   "post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
 172                                   "poll_sge_ts 0x%llx post_poll_delta_ns %llu "
 173                                   "cqe_poll_delta_ns %llu\n",
 174                                   idx,
 175                                   timespec_sub(lep->poll_host_ts,
 176                                                prev_ts).tv_sec,
 177                                   timespec_sub(lep->poll_host_ts,
 178                                                prev_ts).tv_nsec,
 179                                   lep->qid, lep->opcode,
 180                                   lep->opcode == FW_RI_RECEIVE ?
 181                                                        "msn" : "wrid",
 182                                   lep->wr_id,
 183                                   timespec_sub(lep->poll_host_ts,
 184                                                lep->post_host_ts).tv_sec,
 185                                   timespec_sub(lep->poll_host_ts,
 186                                                lep->post_host_ts).tv_nsec,
 187                                   lep->post_sge_ts, lep->cqe_sge_ts,
 188                                   lep->poll_sge_ts,
 189                                   ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
 190                                   ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
 191                        prev_ts = lep->poll_host_ts;
 192                }
 193                idx++;
 194                if (idx > (dev->rdev.wr_log_size - 1))
 195                        idx = 0;
 196                lep = &dev->rdev.wr_log[idx];
 197        }
 198#undef ts2ns
 199        return 0;
 200}
 201
 202static int wr_log_open(struct inode *inode, struct file *file)
 203{
 204        return single_open(file, wr_log_show, inode->i_private);
 205}
 206
 207static ssize_t wr_log_clear(struct file *file, const char __user *buf,
 208                            size_t count, loff_t *pos)
 209{
 210        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 211        int i;
 212
 213        if (dev->rdev.wr_log)
 214                for (i = 0; i < dev->rdev.wr_log_size; i++)
 215                        dev->rdev.wr_log[i].valid = 0;
 216        return count;
 217}
 218
 219static const struct file_operations wr_log_debugfs_fops = {
 220        .owner   = THIS_MODULE,
 221        .open    = wr_log_open,
 222        .release = single_release,
 223        .read    = seq_read,
 224        .llseek  = seq_lseek,
 225        .write   = wr_log_clear,
 226};
 227
 228static int dump_qp(int id, void *p, void *data)
 229{
 230        struct c4iw_qp *qp = p;
 231        struct c4iw_debugfs_data *qpd = data;
 232        int space;
 233        int cc;
 234
 235        if (id != qp->wq.sq.qid)
 236                return 0;
 237
 238        space = qpd->bufsize - qpd->pos - 1;
 239        if (space == 0)
 240                return 1;
 241
 242        if (qp->ep) {
 243                if (qp->ep->com.local_addr.ss_family == AF_INET) {
 244                        struct sockaddr_in *lsin = (struct sockaddr_in *)
 245                                &qp->ep->com.local_addr;
 246                        struct sockaddr_in *rsin = (struct sockaddr_in *)
 247                                &qp->ep->com.remote_addr;
 248                        struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 249                                &qp->ep->com.mapped_local_addr;
 250                        struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 251                                &qp->ep->com.mapped_remote_addr;
 252
 253                        cc = snprintf(qpd->buf + qpd->pos, space,
 254                                      "rc qp sq id %u rq id %u state %u "
 255                                      "onchip %u ep tid %u state %u "
 256                                      "%pI4:%u/%u->%pI4:%u/%u\n",
 257                                      qp->wq.sq.qid, qp->wq.rq.qid,
 258                                      (int)qp->attr.state,
 259                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 260                                      qp->ep->hwtid, (int)qp->ep->com.state,
 261                                      &lsin->sin_addr, ntohs(lsin->sin_port),
 262                                      ntohs(mapped_lsin->sin_port),
 263                                      &rsin->sin_addr, ntohs(rsin->sin_port),
 264                                      ntohs(mapped_rsin->sin_port));
 265                } else {
 266                        struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 267                                &qp->ep->com.local_addr;
 268                        struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 269                                &qp->ep->com.remote_addr;
 270                        struct sockaddr_in6 *mapped_lsin6 =
 271                                (struct sockaddr_in6 *)
 272                                &qp->ep->com.mapped_local_addr;
 273                        struct sockaddr_in6 *mapped_rsin6 =
 274                                (struct sockaddr_in6 *)
 275                                &qp->ep->com.mapped_remote_addr;
 276
 277                        cc = snprintf(qpd->buf + qpd->pos, space,
 278                                      "rc qp sq id %u rq id %u state %u "
 279                                      "onchip %u ep tid %u state %u "
 280                                      "%pI6:%u/%u->%pI6:%u/%u\n",
 281                                      qp->wq.sq.qid, qp->wq.rq.qid,
 282                                      (int)qp->attr.state,
 283                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
 284                                      qp->ep->hwtid, (int)qp->ep->com.state,
 285                                      &lsin6->sin6_addr,
 286                                      ntohs(lsin6->sin6_port),
 287                                      ntohs(mapped_lsin6->sin6_port),
 288                                      &rsin6->sin6_addr,
 289                                      ntohs(rsin6->sin6_port),
 290                                      ntohs(mapped_rsin6->sin6_port));
 291                }
 292        } else
 293                cc = snprintf(qpd->buf + qpd->pos, space,
 294                             "qp sq id %u rq id %u state %u onchip %u\n",
 295                              qp->wq.sq.qid, qp->wq.rq.qid,
 296                              (int)qp->attr.state,
 297                              qp->wq.sq.flags & T4_SQ_ONCHIP);
 298        if (cc < space)
 299                qpd->pos += cc;
 300        return 0;
 301}
 302
 303static int qp_release(struct inode *inode, struct file *file)
 304{
 305        struct c4iw_debugfs_data *qpd = file->private_data;
 306        if (!qpd) {
 307                printk(KERN_INFO "%s null qpd?\n", __func__);
 308                return 0;
 309        }
 310        vfree(qpd->buf);
 311        kfree(qpd);
 312        return 0;
 313}
 314
 315static int qp_open(struct inode *inode, struct file *file)
 316{
 317        struct c4iw_debugfs_data *qpd;
 318        int ret = 0;
 319        int count = 1;
 320
 321        qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
 322        if (!qpd) {
 323                ret = -ENOMEM;
 324                goto out;
 325        }
 326        qpd->devp = inode->i_private;
 327        qpd->pos = 0;
 328
 329        spin_lock_irq(&qpd->devp->lock);
 330        idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
 331        spin_unlock_irq(&qpd->devp->lock);
 332
 333        qpd->bufsize = count * 128;
 334        qpd->buf = vmalloc(qpd->bufsize);
 335        if (!qpd->buf) {
 336                ret = -ENOMEM;
 337                goto err1;
 338        }
 339
 340        spin_lock_irq(&qpd->devp->lock);
 341        idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
 342        spin_unlock_irq(&qpd->devp->lock);
 343
 344        qpd->buf[qpd->pos++] = 0;
 345        file->private_data = qpd;
 346        goto out;
 347err1:
 348        kfree(qpd);
 349out:
 350        return ret;
 351}
 352
 353static const struct file_operations qp_debugfs_fops = {
 354        .owner   = THIS_MODULE,
 355        .open    = qp_open,
 356        .release = qp_release,
 357        .read    = debugfs_read,
 358        .llseek  = default_llseek,
 359};
 360
 361static int dump_stag(int id, void *p, void *data)
 362{
 363        struct c4iw_debugfs_data *stagd = data;
 364        int space;
 365        int cc;
 366        struct fw_ri_tpte tpte;
 367        int ret;
 368
 369        space = stagd->bufsize - stagd->pos - 1;
 370        if (space == 0)
 371                return 1;
 372
 373        ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8,
 374                              (__be32 *)&tpte);
 375        if (ret) {
 376                dev_err(&stagd->devp->rdev.lldi.pdev->dev,
 377                        "%s cxgb4_read_tpte err %d\n", __func__, ret);
 378                return ret;
 379        }
 380        cc = snprintf(stagd->buf + stagd->pos, space,
 381                      "stag: idx 0x%x valid %d key 0x%x state %d pdid %d "
 382                      "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
 383                      (u32)id<<8,
 384                      FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
 385                      FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
 386                      FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
 387                      FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
 388                      FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
 389                      FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
 390                      ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
 391                      ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
 392        if (cc < space)
 393                stagd->pos += cc;
 394        return 0;
 395}
 396
 397static int stag_release(struct inode *inode, struct file *file)
 398{
 399        struct c4iw_debugfs_data *stagd = file->private_data;
 400        if (!stagd) {
 401                printk(KERN_INFO "%s null stagd?\n", __func__);
 402                return 0;
 403        }
 404        vfree(stagd->buf);
 405        kfree(stagd);
 406        return 0;
 407}
 408
 409static int stag_open(struct inode *inode, struct file *file)
 410{
 411        struct c4iw_debugfs_data *stagd;
 412        int ret = 0;
 413        int count = 1;
 414
 415        stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
 416        if (!stagd) {
 417                ret = -ENOMEM;
 418                goto out;
 419        }
 420        stagd->devp = inode->i_private;
 421        stagd->pos = 0;
 422
 423        spin_lock_irq(&stagd->devp->lock);
 424        idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
 425        spin_unlock_irq(&stagd->devp->lock);
 426
 427        stagd->bufsize = count * 256;
 428        stagd->buf = vmalloc(stagd->bufsize);
 429        if (!stagd->buf) {
 430                ret = -ENOMEM;
 431                goto err1;
 432        }
 433
 434        spin_lock_irq(&stagd->devp->lock);
 435        idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
 436        spin_unlock_irq(&stagd->devp->lock);
 437
 438        stagd->buf[stagd->pos++] = 0;
 439        file->private_data = stagd;
 440        goto out;
 441err1:
 442        kfree(stagd);
 443out:
 444        return ret;
 445}
 446
 447static const struct file_operations stag_debugfs_fops = {
 448        .owner   = THIS_MODULE,
 449        .open    = stag_open,
 450        .release = stag_release,
 451        .read    = debugfs_read,
 452        .llseek  = default_llseek,
 453};
 454
 455static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
 456
 457static int stats_show(struct seq_file *seq, void *v)
 458{
 459        struct c4iw_dev *dev = seq->private;
 460
 461        seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
 462                   "Max", "Fail");
 463        seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
 464                        dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
 465                        dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
 466        seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 467                        dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 468                        dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
 469        seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 470                        dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 471                        dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
 472        seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
 473                        dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
 474                        dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
 475        seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
 476                        dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
 477                        dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
 478        seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
 479                        dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
 480                        dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
 481        seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
 482        seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
 483        seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
 484        seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
 485                   db_state_str[dev->db_state],
 486                   dev->rdev.stats.db_state_transitions,
 487                   dev->rdev.stats.db_fc_interruptions);
 488        seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
 489        seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
 490                   dev->rdev.stats.act_ofld_conn_fails);
 491        seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
 492                   dev->rdev.stats.pas_ofld_conn_fails);
 493        seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv);
 494        seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
 495        return 0;
 496}
 497
 498static int stats_open(struct inode *inode, struct file *file)
 499{
 500        return single_open(file, stats_show, inode->i_private);
 501}
 502
 503static ssize_t stats_clear(struct file *file, const char __user *buf,
 504                size_t count, loff_t *pos)
 505{
 506        struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
 507
 508        mutex_lock(&dev->rdev.stats.lock);
 509        dev->rdev.stats.pd.max = 0;
 510        dev->rdev.stats.pd.fail = 0;
 511        dev->rdev.stats.qid.max = 0;
 512        dev->rdev.stats.qid.fail = 0;
 513        dev->rdev.stats.stag.max = 0;
 514        dev->rdev.stats.stag.fail = 0;
 515        dev->rdev.stats.pbl.max = 0;
 516        dev->rdev.stats.pbl.fail = 0;
 517        dev->rdev.stats.rqt.max = 0;
 518        dev->rdev.stats.rqt.fail = 0;
 519        dev->rdev.stats.ocqp.max = 0;
 520        dev->rdev.stats.ocqp.fail = 0;
 521        dev->rdev.stats.db_full = 0;
 522        dev->rdev.stats.db_empty = 0;
 523        dev->rdev.stats.db_drop = 0;
 524        dev->rdev.stats.db_state_transitions = 0;
 525        dev->rdev.stats.tcam_full = 0;
 526        dev->rdev.stats.act_ofld_conn_fails = 0;
 527        dev->rdev.stats.pas_ofld_conn_fails = 0;
 528        mutex_unlock(&dev->rdev.stats.lock);
 529        return count;
 530}
 531
 532static const struct file_operations stats_debugfs_fops = {
 533        .owner   = THIS_MODULE,
 534        .open    = stats_open,
 535        .release = single_release,
 536        .read    = seq_read,
 537        .llseek  = seq_lseek,
 538        .write   = stats_clear,
 539};
 540
 541static int dump_ep(int id, void *p, void *data)
 542{
 543        struct c4iw_ep *ep = p;
 544        struct c4iw_debugfs_data *epd = data;
 545        int space;
 546        int cc;
 547
 548        space = epd->bufsize - epd->pos - 1;
 549        if (space == 0)
 550                return 1;
 551
 552        if (ep->com.local_addr.ss_family == AF_INET) {
 553                struct sockaddr_in *lsin = (struct sockaddr_in *)
 554                        &ep->com.local_addr;
 555                struct sockaddr_in *rsin = (struct sockaddr_in *)
 556                        &ep->com.remote_addr;
 557                struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 558                        &ep->com.mapped_local_addr;
 559                struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 560                        &ep->com.mapped_remote_addr;
 561
 562                cc = snprintf(epd->buf + epd->pos, space,
 563                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 564                              "history 0x%lx hwtid %d atid %d "
 565                              "conn_na %u abort_na %u "
 566                              "%pI4:%d/%d <-> %pI4:%d/%d\n",
 567                              ep, ep->com.cm_id, ep->com.qp,
 568                              (int)ep->com.state, ep->com.flags,
 569                              ep->com.history, ep->hwtid, ep->atid,
 570                              ep->stats.connect_neg_adv,
 571                              ep->stats.abort_neg_adv,
 572                              &lsin->sin_addr, ntohs(lsin->sin_port),
 573                              ntohs(mapped_lsin->sin_port),
 574                              &rsin->sin_addr, ntohs(rsin->sin_port),
 575                              ntohs(mapped_rsin->sin_port));
 576        } else {
 577                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 578                        &ep->com.local_addr;
 579                struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 580                        &ep->com.remote_addr;
 581                struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 582                        &ep->com.mapped_local_addr;
 583                struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
 584                        &ep->com.mapped_remote_addr;
 585
 586                cc = snprintf(epd->buf + epd->pos, space,
 587                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
 588                              "history 0x%lx hwtid %d atid %d "
 589                              "conn_na %u abort_na %u "
 590                              "%pI6:%d/%d <-> %pI6:%d/%d\n",
 591                              ep, ep->com.cm_id, ep->com.qp,
 592                              (int)ep->com.state, ep->com.flags,
 593                              ep->com.history, ep->hwtid, ep->atid,
 594                              ep->stats.connect_neg_adv,
 595                              ep->stats.abort_neg_adv,
 596                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 597                              ntohs(mapped_lsin6->sin6_port),
 598                              &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
 599                              ntohs(mapped_rsin6->sin6_port));
 600        }
 601        if (cc < space)
 602                epd->pos += cc;
 603        return 0;
 604}
 605
 606static int dump_listen_ep(int id, void *p, void *data)
 607{
 608        struct c4iw_listen_ep *ep = p;
 609        struct c4iw_debugfs_data *epd = data;
 610        int space;
 611        int cc;
 612
 613        space = epd->bufsize - epd->pos - 1;
 614        if (space == 0)
 615                return 1;
 616
 617        if (ep->com.local_addr.ss_family == AF_INET) {
 618                struct sockaddr_in *lsin = (struct sockaddr_in *)
 619                        &ep->com.local_addr;
 620                struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 621                        &ep->com.mapped_local_addr;
 622
 623                cc = snprintf(epd->buf + epd->pos, space,
 624                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 625                              "backlog %d %pI4:%d/%d\n",
 626                              ep, ep->com.cm_id, (int)ep->com.state,
 627                              ep->com.flags, ep->stid, ep->backlog,
 628                              &lsin->sin_addr, ntohs(lsin->sin_port),
 629                              ntohs(mapped_lsin->sin_port));
 630        } else {
 631                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 632                        &ep->com.local_addr;
 633                struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 634                        &ep->com.mapped_local_addr;
 635
 636                cc = snprintf(epd->buf + epd->pos, space,
 637                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
 638                              "backlog %d %pI6:%d/%d\n",
 639                              ep, ep->com.cm_id, (int)ep->com.state,
 640                              ep->com.flags, ep->stid, ep->backlog,
 641                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 642                              ntohs(mapped_lsin6->sin6_port));
 643        }
 644        if (cc < space)
 645                epd->pos += cc;
 646        return 0;
 647}
 648
 649static int ep_release(struct inode *inode, struct file *file)
 650{
 651        struct c4iw_debugfs_data *epd = file->private_data;
 652        if (!epd) {
 653                pr_info("%s null qpd?\n", __func__);
 654                return 0;
 655        }
 656        vfree(epd->buf);
 657        kfree(epd);
 658        return 0;
 659}
 660
 661static int ep_open(struct inode *inode, struct file *file)
 662{
 663        struct c4iw_debugfs_data *epd;
 664        int ret = 0;
 665        int count = 1;
 666
 667        epd = kmalloc(sizeof(*epd), GFP_KERNEL);
 668        if (!epd) {
 669                ret = -ENOMEM;
 670                goto out;
 671        }
 672        epd->devp = inode->i_private;
 673        epd->pos = 0;
 674
 675        spin_lock_irq(&epd->devp->lock);
 676        idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
 677        idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
 678        idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
 679        spin_unlock_irq(&epd->devp->lock);
 680
 681        epd->bufsize = count * 240;
 682        epd->buf = vmalloc(epd->bufsize);
 683        if (!epd->buf) {
 684                ret = -ENOMEM;
 685                goto err1;
 686        }
 687
 688        spin_lock_irq(&epd->devp->lock);
 689        idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
 690        idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
 691        idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
 692        spin_unlock_irq(&epd->devp->lock);
 693
 694        file->private_data = epd;
 695        goto out;
 696err1:
 697        kfree(epd);
 698out:
 699        return ret;
 700}
 701
 702static const struct file_operations ep_debugfs_fops = {
 703        .owner   = THIS_MODULE,
 704        .open    = ep_open,
 705        .release = ep_release,
 706        .read    = debugfs_read,
 707};
 708
 709static int setup_debugfs(struct c4iw_dev *devp)
 710{
 711        if (!devp->debugfs_root)
 712                return -1;
 713
 714        debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
 715                                 (void *)devp, &qp_debugfs_fops, 4096);
 716
 717        debugfs_create_file_size("stags", S_IWUSR, devp->debugfs_root,
 718                                 (void *)devp, &stag_debugfs_fops, 4096);
 719
 720        debugfs_create_file_size("stats", S_IWUSR, devp->debugfs_root,
 721                                 (void *)devp, &stats_debugfs_fops, 4096);
 722
 723        debugfs_create_file_size("eps", S_IWUSR, devp->debugfs_root,
 724                                 (void *)devp, &ep_debugfs_fops, 4096);
 725
 726        if (c4iw_wr_log)
 727                debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
 728                                         (void *)devp, &wr_log_debugfs_fops, 4096);
 729        return 0;
 730}
 731
 732void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 733                               struct c4iw_dev_ucontext *uctx)
 734{
 735        struct list_head *pos, *nxt;
 736        struct c4iw_qid_list *entry;
 737
 738        mutex_lock(&uctx->lock);
 739        list_for_each_safe(pos, nxt, &uctx->qpids) {
 740                entry = list_entry(pos, struct c4iw_qid_list, entry);
 741                list_del_init(&entry->entry);
 742                if (!(entry->qid & rdev->qpmask)) {
 743                        c4iw_put_resource(&rdev->resource.qid_table,
 744                                          entry->qid);
 745                        mutex_lock(&rdev->stats.lock);
 746                        rdev->stats.qid.cur -= rdev->qpmask + 1;
 747                        mutex_unlock(&rdev->stats.lock);
 748                }
 749                kfree(entry);
 750        }
 751
 752        list_for_each_safe(pos, nxt, &uctx->qpids) {
 753                entry = list_entry(pos, struct c4iw_qid_list, entry);
 754                list_del_init(&entry->entry);
 755                kfree(entry);
 756        }
 757        mutex_unlock(&uctx->lock);
 758}
 759
 760void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
 761                            struct c4iw_dev_ucontext *uctx)
 762{
 763        INIT_LIST_HEAD(&uctx->qpids);
 764        INIT_LIST_HEAD(&uctx->cqids);
 765        mutex_init(&uctx->lock);
 766}
 767
 768/* Caller takes care of locking if needed */
 769static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 770{
 771        int err;
 772
 773        c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 774
 775        /*
 776         * This implementation assumes udb_density == ucq_density!  Eventually
 777         * we might need to support this but for now fail the open. Also the
 778         * cqid and qpid range must match for now.
 779         */
 780        if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
 781                pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
 782                       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
 783                       rdev->lldi.ucq_density);
 784                err = -EINVAL;
 785                goto err1;
 786        }
 787        if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
 788            rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
 789                pr_err(MOD "%s: unsupported qp and cq id ranges "
 790                       "qp start %u size %u cq start %u size %u\n",
 791                       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
 792                       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
 793                       rdev->lldi.vr->cq.size);
 794                err = -EINVAL;
 795                goto err1;
 796        }
 797
 798        /*
 799         * qpshift is the number of bits to shift the qpid left in order
 800         * to get the correct address of the doorbell for that qp.
 801         */
 802        rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density);
 803        rdev->qpmask = rdev->lldi.udb_density - 1;
 804        rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density);
 805        rdev->cqmask = rdev->lldi.ucq_density - 1;
 806        PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
 807             "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
 808             "qp qid start %u size %u cq qid start %u size %u\n",
 809             __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 810             rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 811             rdev->lldi.vr->pbl.start,
 812             rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
 813             rdev->lldi.vr->rq.size,
 814             rdev->lldi.vr->qp.start,
 815             rdev->lldi.vr->qp.size,
 816             rdev->lldi.vr->cq.start,
 817             rdev->lldi.vr->cq.size);
 818        PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
 819             "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
 820             (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
 821             (void *)pci_resource_start(rdev->lldi.pdev, 2),
 822             rdev->lldi.db_reg,
 823             rdev->lldi.gts_reg,
 824             rdev->qpshift, rdev->qpmask,
 825             rdev->cqshift, rdev->cqmask);
 826
 827        if (c4iw_num_stags(rdev) == 0) {
 828                err = -EINVAL;
 829                goto err1;
 830        }
 831
 832        rdev->stats.pd.total = T4_MAX_NUM_PD;
 833        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 834        rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 835        rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
 836        rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 837        rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 838
 839        err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
 840        if (err) {
 841                printk(KERN_ERR MOD "error %d initializing resources\n", err);
 842                goto err1;
 843        }
 844        err = c4iw_pblpool_create(rdev);
 845        if (err) {
 846                printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
 847                goto err2;
 848        }
 849        err = c4iw_rqtpool_create(rdev);
 850        if (err) {
 851                printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
 852                goto err3;
 853        }
 854        err = c4iw_ocqp_pool_create(rdev);
 855        if (err) {
 856                printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
 857                goto err4;
 858        }
 859        rdev->status_page = (struct t4_dev_status_page *)
 860                            __get_free_page(GFP_KERNEL);
 861        if (!rdev->status_page) {
 862                pr_err(MOD "error allocating status page\n");
 863                goto err4;
 864        }
 865
 866        if (c4iw_wr_log) {
 867                rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
 868                                       sizeof(*rdev->wr_log), GFP_KERNEL);
 869                if (rdev->wr_log) {
 870                        rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
 871                        atomic_set(&rdev->wr_log_idx, 0);
 872                } else {
 873                        pr_err(MOD "error allocating wr_log. Logging disabled\n");
 874                }
 875        }
 876
 877        rdev->status_page->db_off = 0;
 878
 879        return 0;
 880err4:
 881        c4iw_rqtpool_destroy(rdev);
 882err3:
 883        c4iw_pblpool_destroy(rdev);
 884err2:
 885        c4iw_destroy_resource(&rdev->resource);
 886err1:
 887        return err;
 888}
 889
 890static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 891{
 892        kfree(rdev->wr_log);
 893        free_page((unsigned long)rdev->status_page);
 894        c4iw_pblpool_destroy(rdev);
 895        c4iw_rqtpool_destroy(rdev);
 896        c4iw_destroy_resource(&rdev->resource);
 897}
 898
 899static void c4iw_dealloc(struct uld_ctx *ctx)
 900{
 901        c4iw_rdev_close(&ctx->dev->rdev);
 902        idr_destroy(&ctx->dev->cqidr);
 903        idr_destroy(&ctx->dev->qpidr);
 904        idr_destroy(&ctx->dev->mmidr);
 905        idr_destroy(&ctx->dev->hwtid_idr);
 906        idr_destroy(&ctx->dev->stid_idr);
 907        idr_destroy(&ctx->dev->atid_idr);
 908        if (ctx->dev->rdev.bar2_kva)
 909                iounmap(ctx->dev->rdev.bar2_kva);
 910        if (ctx->dev->rdev.oc_mw_kva)
 911                iounmap(ctx->dev->rdev.oc_mw_kva);
 912        ib_dealloc_device(&ctx->dev->ibdev);
 913        ctx->dev = NULL;
 914}
 915
 916static void c4iw_remove(struct uld_ctx *ctx)
 917{
 918        PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
 919        c4iw_unregister_device(ctx->dev);
 920        c4iw_dealloc(ctx);
 921}
 922
 923static int rdma_supported(const struct cxgb4_lld_info *infop)
 924{
 925        return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
 926               infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
 927               infop->vr->cq.size > 0;
 928}
 929
 930static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 931{
 932        struct c4iw_dev *devp;
 933        int ret;
 934
 935        if (!rdma_supported(infop)) {
 936                printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
 937                       pci_name(infop->pdev));
 938                return ERR_PTR(-ENOSYS);
 939        }
 940        if (!ocqp_supported(infop))
 941                pr_info("%s: On-Chip Queues not supported on this device.\n",
 942                        pci_name(infop->pdev));
 943
 944        devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
 945        if (!devp) {
 946                printk(KERN_ERR MOD "Cannot allocate ib device\n");
 947                return ERR_PTR(-ENOMEM);
 948        }
 949        devp->rdev.lldi = *infop;
 950
 951        /* init various hw-queue params based on lld info */
 952        PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
 953             __func__, devp->rdev.lldi.sge_ingpadboundary,
 954             devp->rdev.lldi.sge_egrstatuspagesize);
 955
 956        devp->rdev.hw_queue.t4_eq_status_entries =
 957                devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
 958        devp->rdev.hw_queue.t4_max_eq_size = 65520;
 959        devp->rdev.hw_queue.t4_max_iq_size = 65520;
 960        devp->rdev.hw_queue.t4_max_rq_size = 8192 -
 961                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 962        devp->rdev.hw_queue.t4_max_sq_size =
 963                devp->rdev.hw_queue.t4_max_eq_size -
 964                devp->rdev.hw_queue.t4_eq_status_entries - 1;
 965        devp->rdev.hw_queue.t4_max_qp_depth =
 966                devp->rdev.hw_queue.t4_max_rq_size;
 967        devp->rdev.hw_queue.t4_max_cq_depth =
 968                devp->rdev.hw_queue.t4_max_iq_size - 2;
 969        devp->rdev.hw_queue.t4_stat_len =
 970                devp->rdev.lldi.sge_egrstatuspagesize;
 971
 972        /*
 973         * For T5 devices, we map all of BAR2 with WC.
 974         * For T4 devices with onchip qp mem, we map only that part
 975         * of BAR2 with WC.
 976         */
 977        devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
 978        if (is_t5(devp->rdev.lldi.adapter_type)) {
 979                devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
 980                        pci_resource_len(devp->rdev.lldi.pdev, 2));
 981                if (!devp->rdev.bar2_kva) {
 982                        pr_err(MOD "Unable to ioremap BAR2\n");
 983                        ib_dealloc_device(&devp->ibdev);
 984                        return ERR_PTR(-EINVAL);
 985                }
 986        } else if (ocqp_supported(infop)) {
 987                devp->rdev.oc_mw_pa =
 988                        pci_resource_start(devp->rdev.lldi.pdev, 2) +
 989                        pci_resource_len(devp->rdev.lldi.pdev, 2) -
 990                        roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
 991                devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
 992                        devp->rdev.lldi.vr->ocq.size);
 993                if (!devp->rdev.oc_mw_kva) {
 994                        pr_err(MOD "Unable to ioremap onchip mem\n");
 995                        ib_dealloc_device(&devp->ibdev);
 996                        return ERR_PTR(-EINVAL);
 997                }
 998        }
 999
1000        PDBG(KERN_INFO MOD "ocq memory: "
1001               "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
1002               devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
1003               devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
1004
1005        ret = c4iw_rdev_open(&devp->rdev);
1006        if (ret) {
1007                printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
1008                ib_dealloc_device(&devp->ibdev);
1009                return ERR_PTR(ret);
1010        }
1011
1012        idr_init(&devp->cqidr);
1013        idr_init(&devp->qpidr);
1014        idr_init(&devp->mmidr);
1015        idr_init(&devp->hwtid_idr);
1016        idr_init(&devp->stid_idr);
1017        idr_init(&devp->atid_idr);
1018        spin_lock_init(&devp->lock);
1019        mutex_init(&devp->rdev.stats.lock);
1020        mutex_init(&devp->db_mutex);
1021        INIT_LIST_HEAD(&devp->db_fc_list);
1022        devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
1023
1024        if (c4iw_debugfs_root) {
1025                devp->debugfs_root = debugfs_create_dir(
1026                                        pci_name(devp->rdev.lldi.pdev),
1027                                        c4iw_debugfs_root);
1028                setup_debugfs(devp);
1029        }
1030
1031
1032        return devp;
1033}
1034
1035static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
1036{
1037        struct uld_ctx *ctx;
1038        static int vers_printed;
1039        int i;
1040
1041        if (!vers_printed++)
1042                pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
1043                        DRV_VERSION);
1044
1045        ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
1046        if (!ctx) {
1047                ctx = ERR_PTR(-ENOMEM);
1048                goto out;
1049        }
1050        ctx->lldi = *infop;
1051
1052        PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
1053             __func__, pci_name(ctx->lldi.pdev),
1054             ctx->lldi.nchan, ctx->lldi.nrxq,
1055             ctx->lldi.ntxq, ctx->lldi.nports);
1056
1057        mutex_lock(&dev_mutex);
1058        list_add_tail(&ctx->entry, &uld_ctx_list);
1059        mutex_unlock(&dev_mutex);
1060
1061        for (i = 0; i < ctx->lldi.nrxq; i++)
1062                PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
1063out:
1064        return ctx;
1065}
1066
1067static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
1068                                                 const __be64 *rsp,
1069                                                 u32 pktshift)
1070{
1071        struct sk_buff *skb;
1072
1073        /*
1074         * Allocate space for cpl_pass_accept_req which will be synthesized by
1075         * driver. Once the driver synthesizes the request the skb will go
1076         * through the regular cpl_pass_accept_req processing.
1077         * The math here assumes sizeof cpl_pass_accept_req >= sizeof
1078         * cpl_rx_pkt.
1079         */
1080        skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1081                        sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
1082        if (unlikely(!skb))
1083                return NULL;
1084
1085         __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
1086                   sizeof(struct rss_header) - pktshift);
1087
1088        /*
1089         * This skb will contain:
1090         *   rss_header from the rspq descriptor (1 flit)
1091         *   cpl_rx_pkt struct from the rspq descriptor (2 flits)
1092         *   space for the difference between the size of an
1093         *      rx_pkt and pass_accept_req cpl (1 flit)
1094         *   the packet data from the gl
1095         */
1096        skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
1097                                sizeof(struct rss_header));
1098        skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
1099                                       sizeof(struct cpl_pass_accept_req),
1100                                       gl->va + pktshift,
1101                                       gl->tot_len - pktshift);
1102        return skb;
1103}
1104
1105static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
1106                           const __be64 *rsp)
1107{
1108        unsigned int opcode = *(u8 *)rsp;
1109        struct sk_buff *skb;
1110
1111        if (opcode != CPL_RX_PKT)
1112                goto out;
1113
1114        skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
1115        if (skb == NULL)
1116                goto out;
1117
1118        if (c4iw_handlers[opcode] == NULL) {
1119                pr_info("%s no handler opcode 0x%x...\n", __func__,
1120                       opcode);
1121                kfree_skb(skb);
1122                goto out;
1123        }
1124        c4iw_handlers[opcode](dev, skb);
1125        return 1;
1126out:
1127        return 0;
1128}
1129
1130static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
1131                        const struct pkt_gl *gl)
1132{
1133        struct uld_ctx *ctx = handle;
1134        struct c4iw_dev *dev = ctx->dev;
1135        struct sk_buff *skb;
1136        u8 opcode;
1137
1138        if (gl == NULL) {
1139                /* omit RSS and rsp_ctrl at end of descriptor */
1140                unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
1141
1142                skb = alloc_skb(256, GFP_ATOMIC);
1143                if (!skb)
1144                        goto nomem;
1145                __skb_put(skb, len);
1146                skb_copy_to_linear_data(skb, &rsp[1], len);
1147        } else if (gl == CXGB4_MSG_AN) {
1148                const struct rsp_ctrl *rc = (void *)rsp;
1149
1150                u32 qid = be32_to_cpu(rc->pldbuflen_qid);
1151                c4iw_ev_handler(dev, qid);
1152                return 0;
1153        } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
1154                if (recv_rx_pkt(dev, gl, rsp))
1155                        return 0;
1156
1157                pr_info("%s: unexpected FL contents at %p, " \
1158                       "RSS %#llx, FL %#llx, len %u\n",
1159                       pci_name(ctx->lldi.pdev), gl->va,
1160                       (unsigned long long)be64_to_cpu(*rsp),
1161                       (unsigned long long)be64_to_cpu(
1162                       *(__force __be64 *)gl->va),
1163                       gl->tot_len);
1164
1165                return 0;
1166        } else {
1167                skb = cxgb4_pktgl_to_skb(gl, 128, 128);
1168                if (unlikely(!skb))
1169                        goto nomem;
1170        }
1171
1172        opcode = *(u8 *)rsp;
1173        if (c4iw_handlers[opcode]) {
1174                c4iw_handlers[opcode](dev, skb);
1175        } else {
1176                pr_info("%s no handler opcode 0x%x...\n", __func__,
1177                       opcode);
1178                kfree_skb(skb);
1179        }
1180
1181        return 0;
1182nomem:
1183        return -1;
1184}
1185
1186static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
1187{
1188        struct uld_ctx *ctx = handle;
1189
1190        PDBG("%s new_state %u\n", __func__, new_state);
1191        switch (new_state) {
1192        case CXGB4_STATE_UP:
1193                printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
1194                if (!ctx->dev) {
1195                        int ret;
1196
1197                        ctx->dev = c4iw_alloc(&ctx->lldi);
1198                        if (IS_ERR(ctx->dev)) {
1199                                printk(KERN_ERR MOD
1200                                       "%s: initialization failed: %ld\n",
1201                                       pci_name(ctx->lldi.pdev),
1202                                       PTR_ERR(ctx->dev));
1203                                ctx->dev = NULL;
1204                                break;
1205                        }
1206                        ret = c4iw_register_device(ctx->dev);
1207                        if (ret) {
1208                                printk(KERN_ERR MOD
1209                                       "%s: RDMA registration failed: %d\n",
1210                                       pci_name(ctx->lldi.pdev), ret);
1211                                c4iw_dealloc(ctx);
1212                        }
1213                }
1214                break;
1215        case CXGB4_STATE_DOWN:
1216                printk(KERN_INFO MOD "%s: Down\n",
1217                       pci_name(ctx->lldi.pdev));
1218                if (ctx->dev)
1219                        c4iw_remove(ctx);
1220                break;
1221        case CXGB4_STATE_START_RECOVERY:
1222                printk(KERN_INFO MOD "%s: Fatal Error\n",
1223                       pci_name(ctx->lldi.pdev));
1224                if (ctx->dev) {
1225                        struct ib_event event;
1226
1227                        ctx->dev->rdev.flags |= T4_FATAL_ERROR;
1228                        memset(&event, 0, sizeof event);
1229                        event.event  = IB_EVENT_DEVICE_FATAL;
1230                        event.device = &ctx->dev->ibdev;
1231                        ib_dispatch_event(&event);
1232                        c4iw_remove(ctx);
1233                }
1234                break;
1235        case CXGB4_STATE_DETACH:
1236                printk(KERN_INFO MOD "%s: Detach\n",
1237                       pci_name(ctx->lldi.pdev));
1238                if (ctx->dev)
1239                        c4iw_remove(ctx);
1240                break;
1241        }
1242        return 0;
1243}
1244
1245static int disable_qp_db(int id, void *p, void *data)
1246{
1247        struct c4iw_qp *qp = p;
1248
1249        t4_disable_wq_db(&qp->wq);
1250        return 0;
1251}
1252
1253static void stop_queues(struct uld_ctx *ctx)
1254{
1255        unsigned long flags;
1256
1257        spin_lock_irqsave(&ctx->dev->lock, flags);
1258        ctx->dev->rdev.stats.db_state_transitions++;
1259        ctx->dev->db_state = STOPPED;
1260        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
1261                idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
1262        else
1263                ctx->dev->rdev.status_page->db_off = 1;
1264        spin_unlock_irqrestore(&ctx->dev->lock, flags);
1265}
1266
1267static int enable_qp_db(int id, void *p, void *data)
1268{
1269        struct c4iw_qp *qp = p;
1270
1271        t4_enable_wq_db(&qp->wq);
1272        return 0;
1273}
1274
1275static void resume_rc_qp(struct c4iw_qp *qp)
1276{
1277        spin_lock(&qp->lock);
1278        t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
1279                      is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
1280        qp->wq.sq.wq_pidx_inc = 0;
1281        t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
1282                      is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
1283        qp->wq.rq.wq_pidx_inc = 0;
1284        spin_unlock(&qp->lock);
1285}
1286
1287static void resume_a_chunk(struct uld_ctx *ctx)
1288{
1289        int i;
1290        struct c4iw_qp *qp;
1291
1292        for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
1293                qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
1294                                      db_fc_entry);
1295                list_del_init(&qp->db_fc_entry);
1296                resume_rc_qp(qp);
1297                if (list_empty(&ctx->dev->db_fc_list))
1298                        break;
1299        }
1300}
1301
1302static void resume_queues(struct uld_ctx *ctx)
1303{
1304        spin_lock_irq(&ctx->dev->lock);
1305        if (ctx->dev->db_state != STOPPED)
1306                goto out;
1307        ctx->dev->db_state = FLOW_CONTROL;
1308        while (1) {
1309                if (list_empty(&ctx->dev->db_fc_list)) {
1310                        WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
1311                        ctx->dev->db_state = NORMAL;
1312                        ctx->dev->rdev.stats.db_state_transitions++;
1313                        if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
1314                                idr_for_each(&ctx->dev->qpidr, enable_qp_db,
1315                                             NULL);
1316                        } else {
1317                                ctx->dev->rdev.status_page->db_off = 0;
1318                        }
1319                        break;
1320                } else {
1321                        if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
1322                            < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
1323                               DB_FC_DRAIN_THRESH)) {
1324                                resume_a_chunk(ctx);
1325                        }
1326                        if (!list_empty(&ctx->dev->db_fc_list)) {
1327                                spin_unlock_irq(&ctx->dev->lock);
1328                                if (DB_FC_RESUME_DELAY) {
1329                                        set_current_state(TASK_UNINTERRUPTIBLE);
1330                                        schedule_timeout(DB_FC_RESUME_DELAY);
1331                                }
1332                                spin_lock_irq(&ctx->dev->lock);
1333                                if (ctx->dev->db_state != FLOW_CONTROL)
1334                                        break;
1335                        }
1336                }
1337        }
1338out:
1339        if (ctx->dev->db_state != NORMAL)
1340                ctx->dev->rdev.stats.db_fc_interruptions++;
1341        spin_unlock_irq(&ctx->dev->lock);
1342}
1343
1344struct qp_list {
1345        unsigned idx;
1346        struct c4iw_qp **qps;
1347};
1348
1349static int add_and_ref_qp(int id, void *p, void *data)
1350{
1351        struct qp_list *qp_listp = data;
1352        struct c4iw_qp *qp = p;
1353
1354        c4iw_qp_add_ref(&qp->ibqp);
1355        qp_listp->qps[qp_listp->idx++] = qp;
1356        return 0;
1357}
1358
1359static int count_qps(int id, void *p, void *data)
1360{
1361        unsigned *countp = data;
1362        (*countp)++;
1363        return 0;
1364}
1365
1366static void deref_qps(struct qp_list *qp_list)
1367{
1368        int idx;
1369
1370        for (idx = 0; idx < qp_list->idx; idx++)
1371                c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
1372}
1373
1374static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
1375{
1376        int idx;
1377        int ret;
1378
1379        for (idx = 0; idx < qp_list->idx; idx++) {
1380                struct c4iw_qp *qp = qp_list->qps[idx];
1381
1382                spin_lock_irq(&qp->rhp->lock);
1383                spin_lock(&qp->lock);
1384                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1385                                          qp->wq.sq.qid,
1386                                          t4_sq_host_wq_pidx(&qp->wq),
1387                                          t4_sq_wq_size(&qp->wq));
1388                if (ret) {
1389                        pr_err(MOD "%s: Fatal error - "
1390                               "DB overflow recovery failed - "
1391                               "error syncing SQ qid %u\n",
1392                               pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
1393                        spin_unlock(&qp->lock);
1394                        spin_unlock_irq(&qp->rhp->lock);
1395                        return;
1396                }
1397                qp->wq.sq.wq_pidx_inc = 0;
1398
1399                ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
1400                                          qp->wq.rq.qid,
1401                                          t4_rq_host_wq_pidx(&qp->wq),
1402                                          t4_rq_wq_size(&qp->wq));
1403
1404                if (ret) {
1405                        pr_err(MOD "%s: Fatal error - "
1406                               "DB overflow recovery failed - "
1407                               "error syncing RQ qid %u\n",
1408                               pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
1409                        spin_unlock(&qp->lock);
1410                        spin_unlock_irq(&qp->rhp->lock);
1411                        return;
1412                }
1413                qp->wq.rq.wq_pidx_inc = 0;
1414                spin_unlock(&qp->lock);
1415                spin_unlock_irq(&qp->rhp->lock);
1416
1417                /* Wait for the dbfifo to drain */
1418                while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
1419                        set_current_state(TASK_UNINTERRUPTIBLE);
1420                        schedule_timeout(usecs_to_jiffies(10));
1421                }
1422        }
1423}
1424
1425static void recover_queues(struct uld_ctx *ctx)
1426{
1427        int count = 0;
1428        struct qp_list qp_list;
1429        int ret;
1430
1431        /* slow everybody down */
1432        set_current_state(TASK_UNINTERRUPTIBLE);
1433        schedule_timeout(usecs_to_jiffies(1000));
1434
1435        /* flush the SGE contexts */
1436        ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
1437        if (ret) {
1438                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1439                       pci_name(ctx->lldi.pdev));
1440                return;
1441        }
1442
1443        /* Count active queues so we can build a list of queues to recover */
1444        spin_lock_irq(&ctx->dev->lock);
1445        WARN_ON(ctx->dev->db_state != STOPPED);
1446        ctx->dev->db_state = RECOVERY;
1447        idr_for_each(&ctx->dev->qpidr, count_qps, &count);
1448
1449        qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
1450        if (!qp_list.qps) {
1451                printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1452                       pci_name(ctx->lldi.pdev));
1453                spin_unlock_irq(&ctx->dev->lock);
1454                return;
1455        }
1456        qp_list.idx = 0;
1457
1458        /* add and ref each qp so it doesn't get freed */
1459        idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
1460
1461        spin_unlock_irq(&ctx->dev->lock);
1462
1463        /* now traverse the list in a safe context to recover the db state*/
1464        recover_lost_dbs(ctx, &qp_list);
1465
1466        /* we're almost done!  deref the qps and clean up */
1467        deref_qps(&qp_list);
1468        kfree(qp_list.qps);
1469
1470        spin_lock_irq(&ctx->dev->lock);
1471        WARN_ON(ctx->dev->db_state != RECOVERY);
1472        ctx->dev->db_state = STOPPED;
1473        spin_unlock_irq(&ctx->dev->lock);
1474}
1475
1476static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
1477{
1478        struct uld_ctx *ctx = handle;
1479
1480        switch (control) {
1481        case CXGB4_CONTROL_DB_FULL:
1482                stop_queues(ctx);
1483                ctx->dev->rdev.stats.db_full++;
1484                break;
1485        case CXGB4_CONTROL_DB_EMPTY:
1486                resume_queues(ctx);
1487                mutex_lock(&ctx->dev->rdev.stats.lock);
1488                ctx->dev->rdev.stats.db_empty++;
1489                mutex_unlock(&ctx->dev->rdev.stats.lock);
1490                break;
1491        case CXGB4_CONTROL_DB_DROP:
1492                recover_queues(ctx);
1493                mutex_lock(&ctx->dev->rdev.stats.lock);
1494                ctx->dev->rdev.stats.db_drop++;
1495                mutex_unlock(&ctx->dev->rdev.stats.lock);
1496                break;
1497        default:
1498                printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
1499                       pci_name(ctx->lldi.pdev), control);
1500                break;
1501        }
1502        return 0;
1503}
1504
1505static struct cxgb4_uld_info c4iw_uld_info = {
1506        .name = DRV_NAME,
1507        .add = c4iw_uld_add,
1508        .rx_handler = c4iw_uld_rx_handler,
1509        .state_change = c4iw_uld_state_change,
1510        .control = c4iw_uld_control,
1511};
1512
1513static int __init c4iw_init_module(void)
1514{
1515        int err;
1516
1517        err = c4iw_cm_init();
1518        if (err)
1519                return err;
1520
1521        c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
1522        if (!c4iw_debugfs_root)
1523                printk(KERN_WARNING MOD
1524                       "could not create debugfs entry, continuing\n");
1525
1526        if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
1527                            c4iw_nl_cb_table))
1528                pr_err("%s[%u]: Failed to add netlink callback\n"
1529                       , __func__, __LINE__);
1530
1531        err = iwpm_init(RDMA_NL_C4IW);
1532        if (err) {
1533                pr_err("port mapper initialization failed with %d\n", err);
1534                ibnl_remove_client(RDMA_NL_C4IW);
1535                c4iw_cm_term();
1536                debugfs_remove_recursive(c4iw_debugfs_root);
1537                return err;
1538        }
1539
1540        cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
1541
1542        return 0;
1543}
1544
1545static void __exit c4iw_exit_module(void)
1546{
1547        struct uld_ctx *ctx, *tmp;
1548
1549        mutex_lock(&dev_mutex);
1550        list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
1551                if (ctx->dev)
1552                        c4iw_remove(ctx);
1553                kfree(ctx);
1554        }
1555        mutex_unlock(&dev_mutex);
1556        cxgb4_unregister_uld(CXGB4_ULD_RDMA);
1557        iwpm_exit(RDMA_NL_C4IW);
1558        ibnl_remove_client(RDMA_NL_C4IW);
1559        c4iw_cm_term();
1560        debugfs_remove_recursive(c4iw_debugfs_root);
1561}
1562
1563module_init(c4iw_init_module);
1564module_exit(c4iw_exit_module);
1565