linux/drivers/infiniband/hw/hfi1/opfn.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
   2/*
   3 * Copyright(c) 2018 Intel Corporation.
   4 *
   5 */
   6#include "hfi.h"
   7#include "trace.h"
   8#include "qp.h"
   9#include "opfn.h"
  10
  11#define IB_BTHE_E                 BIT(IB_BTHE_E_SHIFT)
  12
  13#define OPFN_CODE(code) BIT((code) - 1)
  14#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
  15
  16struct hfi1_opfn_type {
  17        bool (*request)(struct rvt_qp *qp, u64 *data);
  18        bool (*response)(struct rvt_qp *qp, u64 *data);
  19        bool (*reply)(struct rvt_qp *qp, u64 data);
  20        void (*error)(struct rvt_qp *qp);
  21};
  22
  23static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
  24        [STL_VERBS_EXTD_TID_RDMA] = {
  25                .request = tid_rdma_conn_req,
  26                .response = tid_rdma_conn_resp,
  27                .reply = tid_rdma_conn_reply,
  28                .error = tid_rdma_conn_error,
  29        },
  30};
  31
  32static struct workqueue_struct *opfn_wq;
  33
  34static void opfn_schedule_conn_request(struct rvt_qp *qp);
  35
  36static bool hfi1_opfn_extended(u32 bth1)
  37{
  38        return !!(bth1 & IB_BTHE_E);
  39}
  40
  41static void opfn_conn_request(struct rvt_qp *qp)
  42{
  43        struct hfi1_qp_priv *priv = qp->priv;
  44        struct ib_atomic_wr wr;
  45        u16 mask, capcode;
  46        struct hfi1_opfn_type *extd;
  47        u64 data;
  48        unsigned long flags;
  49        int ret = 0;
  50
  51        trace_hfi1_opfn_state_conn_request(qp);
  52        spin_lock_irqsave(&priv->opfn.lock, flags);
  53        /*
  54         * Exit if the extended bit is not set, or if nothing is requested, or
  55         * if we have completed all requests, or if a previous request is in
  56         * progress
  57         */
  58        if (!priv->opfn.extended || !priv->opfn.requested ||
  59            priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
  60                goto done;
  61
  62        mask = priv->opfn.requested & ~priv->opfn.completed;
  63        capcode = ilog2(mask & ~(mask - 1)) + 1;
  64        if (capcode >= STL_VERBS_EXTD_MAX) {
  65                priv->opfn.completed |= OPFN_CODE(capcode);
  66                goto done;
  67        }
  68
  69        extd = &hfi1_opfn_handlers[capcode];
  70        if (!extd || !extd->request || !extd->request(qp, &data)) {
  71                /*
  72                 * Either there is no handler for this capability or the request
  73                 * packet could not be generated. Either way, mark it as done so
  74                 * we don't keep attempting to complete it.
  75                 */
  76                priv->opfn.completed |= OPFN_CODE(capcode);
  77                goto done;
  78        }
  79
  80        trace_hfi1_opfn_data_conn_request(qp, capcode, data);
  81        data = (data & ~0xf) | capcode;
  82
  83        memset(&wr, 0, sizeof(wr));
  84        wr.wr.opcode = IB_WR_OPFN;
  85        wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
  86        wr.compare_add = data;
  87
  88        priv->opfn.curr = capcode;      /* A new request is now in progress */
  89        /* Drop opfn.lock before calling ib_post_send() */
  90        spin_unlock_irqrestore(&priv->opfn.lock, flags);
  91
  92        ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
  93        if (ret)
  94                goto err;
  95        trace_hfi1_opfn_state_conn_request(qp);
  96        return;
  97err:
  98        trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
  99                                         (u64)ret);
 100        spin_lock_irqsave(&priv->opfn.lock, flags);
 101        /*
 102         * In case of an unexpected error return from ib_post_send
 103         * clear opfn.curr and reschedule to try again
 104         */
 105        priv->opfn.curr = STL_VERBS_EXTD_NONE;
 106        opfn_schedule_conn_request(qp);
 107done:
 108        spin_unlock_irqrestore(&priv->opfn.lock, flags);
 109}
 110
 111void opfn_send_conn_request(struct work_struct *work)
 112{
 113        struct hfi1_opfn_data *od;
 114        struct hfi1_qp_priv *qpriv;
 115
 116        od = container_of(work, struct hfi1_opfn_data, opfn_work);
 117        qpriv = container_of(od, struct hfi1_qp_priv, opfn);
 118
 119        opfn_conn_request(qpriv->owner);
 120}
 121
 122/*
 123 * When QP s_lock is held in the caller, the OPFN request must be scheduled
 124 * to a different workqueue to avoid double locking QP s_lock in call to
 125 * ib_post_send in opfn_conn_request
 126 */
 127static void opfn_schedule_conn_request(struct rvt_qp *qp)
 128{
 129        struct hfi1_qp_priv *priv = qp->priv;
 130
 131        trace_hfi1_opfn_state_sched_conn_request(qp);
 132        queue_work(opfn_wq, &priv->opfn.opfn_work);
 133}
 134
 135void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
 136                        struct ib_atomic_eth *ateth)
 137{
 138        struct hfi1_qp_priv *priv = qp->priv;
 139        u64 data = be64_to_cpu(ateth->compare_data);
 140        struct hfi1_opfn_type *extd;
 141        u8 capcode;
 142        unsigned long flags;
 143
 144        trace_hfi1_opfn_state_conn_response(qp);
 145        capcode = data & 0xf;
 146        trace_hfi1_opfn_data_conn_response(qp, capcode, data);
 147        if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
 148                return;
 149
 150        extd = &hfi1_opfn_handlers[capcode];
 151
 152        if (!extd || !extd->response) {
 153                e->atomic_data = capcode;
 154                return;
 155        }
 156
 157        spin_lock_irqsave(&priv->opfn.lock, flags);
 158        if (priv->opfn.completed & OPFN_CODE(capcode)) {
 159                /*
 160                 * We are receiving a request for a feature that has already
 161                 * been negotiated. This may mean that the other side has reset
 162                 */
 163                priv->opfn.completed &= ~OPFN_CODE(capcode);
 164                if (extd->error)
 165                        extd->error(qp);
 166        }
 167
 168        if (extd->response(qp, &data))
 169                priv->opfn.completed |= OPFN_CODE(capcode);
 170        e->atomic_data = (data & ~0xf) | capcode;
 171        trace_hfi1_opfn_state_conn_response(qp);
 172        spin_unlock_irqrestore(&priv->opfn.lock, flags);
 173}
 174
 175void opfn_conn_reply(struct rvt_qp *qp, u64 data)
 176{
 177        struct hfi1_qp_priv *priv = qp->priv;
 178        struct hfi1_opfn_type *extd;
 179        u8 capcode;
 180        unsigned long flags;
 181
 182        trace_hfi1_opfn_state_conn_reply(qp);
 183        capcode = data & 0xf;
 184        trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
 185        if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
 186                return;
 187
 188        spin_lock_irqsave(&priv->opfn.lock, flags);
 189        /*
 190         * Either there is no previous request or the reply is not for the
 191         * current request
 192         */
 193        if (!priv->opfn.curr || capcode != priv->opfn.curr)
 194                goto done;
 195
 196        extd = &hfi1_opfn_handlers[capcode];
 197
 198        if (!extd || !extd->reply)
 199                goto clear;
 200
 201        if (extd->reply(qp, data))
 202                priv->opfn.completed |= OPFN_CODE(capcode);
 203clear:
 204        /*
 205         * Clear opfn.curr to indicate that the previous request is no longer in
 206         * progress
 207         */
 208        priv->opfn.curr = STL_VERBS_EXTD_NONE;
 209        trace_hfi1_opfn_state_conn_reply(qp);
 210done:
 211        spin_unlock_irqrestore(&priv->opfn.lock, flags);
 212}
 213
 214void opfn_conn_error(struct rvt_qp *qp)
 215{
 216        struct hfi1_qp_priv *priv = qp->priv;
 217        struct hfi1_opfn_type *extd = NULL;
 218        unsigned long flags;
 219        u16 capcode;
 220
 221        trace_hfi1_opfn_state_conn_error(qp);
 222        trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
 223        /*
 224         * The QP has gone into the Error state. We have to invalidate all
 225         * negotiated feature, including the one in progress (if any). The RC
 226         * QP handling will clean the WQE for the connection request.
 227         */
 228        spin_lock_irqsave(&priv->opfn.lock, flags);
 229        while (priv->opfn.completed) {
 230                capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
 231                extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
 232                if (extd->error)
 233                        extd->error(qp);
 234                priv->opfn.completed &= ~OPFN_CODE(capcode);
 235        }
 236        priv->opfn.extended = 0;
 237        priv->opfn.requested = 0;
 238        priv->opfn.curr = STL_VERBS_EXTD_NONE;
 239        spin_unlock_irqrestore(&priv->opfn.lock, flags);
 240}
 241
 242void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
 243{
 244        struct ib_qp *ibqp = &qp->ibqp;
 245        struct hfi1_qp_priv *priv = qp->priv;
 246        unsigned long flags;
 247
 248        if (attr_mask & IB_QP_RETRY_CNT)
 249                priv->s_retry = attr->retry_cnt;
 250
 251        spin_lock_irqsave(&priv->opfn.lock, flags);
 252        if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
 253                struct tid_rdma_params *local = &priv->tid_rdma.local;
 254
 255                if (attr_mask & IB_QP_TIMEOUT)
 256                        priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
 257                if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
 258                    qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
 259                        tid_rdma_opfn_init(qp, local);
 260                        /*
 261                         * We only want to set the OPFN requested bit when the
 262                         * QP transitions to RTS.
 263                         */
 264                        if (attr_mask & IB_QP_STATE &&
 265                            attr->qp_state == IB_QPS_RTS) {
 266                                priv->opfn.requested |= OPFN_MASK(TID_RDMA);
 267                                /*
 268                                 * If the QP is transitioning to RTS and the
 269                                 * opfn.completed for TID RDMA has already been
 270                                 * set, the QP is being moved *back* into RTS.
 271                                 * We can now renegotiate the TID RDMA
 272                                 * parameters.
 273                                 */
 274                                if (priv->opfn.completed &
 275                                    OPFN_MASK(TID_RDMA)) {
 276                                        priv->opfn.completed &=
 277                                                ~OPFN_MASK(TID_RDMA);
 278                                        /*
 279                                         * Since the opfn.completed bit was
 280                                         * already set, it is safe to assume
 281                                         * that the opfn.extended is also set.
 282                                         */
 283                                        opfn_schedule_conn_request(qp);
 284                                }
 285                        }
 286                } else {
 287                        memset(local, 0, sizeof(*local));
 288                }
 289        }
 290        spin_unlock_irqrestore(&priv->opfn.lock, flags);
 291}
 292
 293void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
 294{
 295        struct hfi1_qp_priv *priv = qp->priv;
 296
 297        if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
 298            HFI1_CAP_IS_KSET(OPFN)) {
 299                priv->opfn.extended = 1;
 300                if (qp->state == IB_QPS_RTS)
 301                        opfn_conn_request(qp);
 302        }
 303}
 304
 305int opfn_init(void)
 306{
 307        opfn_wq = alloc_workqueue("hfi_opfn",
 308                                  WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
 309                                  WQ_MEM_RECLAIM,
 310                                  HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
 311        if (!opfn_wq)
 312                return -ENOMEM;
 313
 314        return 0;
 315}
 316
 317void opfn_exit(void)
 318{
 319        if (opfn_wq) {
 320                destroy_workqueue(opfn_wq);
 321                opfn_wq = NULL;
 322        }
 323}
 324