linux/drivers/infiniband/hw/efa/efa_com.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
   2/*
   3 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
   4 */
   5
   6#include "efa_com.h"
   7#include "efa_regs_defs.h"
   8
   9#define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
  10
  11#define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
  12#define EFA_MMIO_READ_INVALID 0xffffffff
  13
  14#define EFA_POLL_INTERVAL_MS 100 /* msecs */
  15
  16#define EFA_ASYNC_QUEUE_DEPTH 16
  17#define EFA_ADMIN_QUEUE_DEPTH 32
  18
  19#define MIN_EFA_VER\
  20        ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
  21         (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
  22
  23#define EFA_CTRL_MAJOR          0
  24#define EFA_CTRL_MINOR          0
  25#define EFA_CTRL_SUB_MINOR      1
  26
  27#define MIN_EFA_CTRL_VER \
  28        (((EFA_CTRL_MAJOR) << \
  29        (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
  30        ((EFA_CTRL_MINOR) << \
  31        (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
  32        (EFA_CTRL_SUB_MINOR))
  33
  34#define EFA_DMA_ADDR_TO_UINT32_LOW(x)   ((u32)((u64)(x)))
  35#define EFA_DMA_ADDR_TO_UINT32_HIGH(x)  ((u32)(((u64)(x)) >> 32))
  36
  37#define EFA_REGS_ADMIN_INTR_MASK 1
  38
  39enum efa_cmd_status {
  40        EFA_CMD_SUBMITTED,
  41        EFA_CMD_COMPLETED,
  42};
  43
  44struct efa_comp_ctx {
  45        struct completion wait_event;
  46        struct efa_admin_acq_entry *user_cqe;
  47        u32 comp_size;
  48        enum efa_cmd_status status;
  49        /* status from the device */
  50        u8 comp_status;
  51        u8 cmd_opcode;
  52        u8 occupied;
  53};
  54
  55static const char *efa_com_cmd_str(u8 cmd)
  56{
  57#define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
  58
  59        switch (cmd) {
  60        EFA_CMD_STR_CASE(CREATE_QP);
  61        EFA_CMD_STR_CASE(MODIFY_QP);
  62        EFA_CMD_STR_CASE(QUERY_QP);
  63        EFA_CMD_STR_CASE(DESTROY_QP);
  64        EFA_CMD_STR_CASE(CREATE_AH);
  65        EFA_CMD_STR_CASE(DESTROY_AH);
  66        EFA_CMD_STR_CASE(REG_MR);
  67        EFA_CMD_STR_CASE(DEREG_MR);
  68        EFA_CMD_STR_CASE(CREATE_CQ);
  69        EFA_CMD_STR_CASE(DESTROY_CQ);
  70        EFA_CMD_STR_CASE(GET_FEATURE);
  71        EFA_CMD_STR_CASE(SET_FEATURE);
  72        EFA_CMD_STR_CASE(GET_STATS);
  73        EFA_CMD_STR_CASE(ALLOC_PD);
  74        EFA_CMD_STR_CASE(DEALLOC_PD);
  75        EFA_CMD_STR_CASE(ALLOC_UAR);
  76        EFA_CMD_STR_CASE(DEALLOC_UAR);
  77        default: return "unknown command opcode";
  78        }
  79#undef EFA_CMD_STR_CASE
  80}
  81
  82static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
  83{
  84        struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
  85        struct efa_admin_mmio_req_read_less_resp *read_resp;
  86        unsigned long exp_time;
  87        u32 mmio_read_reg;
  88        u32 err;
  89
  90        read_resp = mmio_read->read_resp;
  91
  92        spin_lock(&mmio_read->lock);
  93        mmio_read->seq_num++;
  94
  95        /* trash DMA req_id to identify when hardware is done */
  96        read_resp->req_id = mmio_read->seq_num + 0x9aL;
  97        mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
  98                        EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
  99        mmio_read_reg |= mmio_read->seq_num &
 100                         EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
 101
 102        writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
 103
 104        exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
 105        do {
 106                if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
 107                        break;
 108                udelay(1);
 109        } while (time_is_after_jiffies(exp_time));
 110
 111        if (read_resp->req_id != mmio_read->seq_num) {
 112                ibdev_err_ratelimited(
 113                        edev->efa_dev,
 114                        "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
 115                        mmio_read->seq_num, offset, read_resp->req_id,
 116                        read_resp->reg_off);
 117                err = EFA_MMIO_READ_INVALID;
 118                goto out;
 119        }
 120
 121        if (read_resp->reg_off != offset) {
 122                ibdev_err_ratelimited(
 123                        edev->efa_dev,
 124                        "Reading register failed: wrong offset provided\n");
 125                err = EFA_MMIO_READ_INVALID;
 126                goto out;
 127        }
 128
 129        err = read_resp->reg_val;
 130out:
 131        spin_unlock(&mmio_read->lock);
 132        return err;
 133}
 134
 135static int efa_com_admin_init_sq(struct efa_com_dev *edev)
 136{
 137        struct efa_com_admin_queue *aq = &edev->aq;
 138        struct efa_com_admin_sq *sq = &aq->sq;
 139        u16 size = aq->depth * sizeof(*sq->entries);
 140        u32 addr_high;
 141        u32 addr_low;
 142        u32 aq_caps;
 143
 144        sq->entries =
 145                dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
 146        if (!sq->entries)
 147                return -ENOMEM;
 148
 149        spin_lock_init(&sq->lock);
 150
 151        sq->cc = 0;
 152        sq->pc = 0;
 153        sq->phase = 1;
 154
 155        sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
 156
 157        addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
 158        addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
 159
 160        writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
 161        writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
 162
 163        aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
 164        aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
 165                        EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
 166                        EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
 167
 168        writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
 169
 170        return 0;
 171}
 172
 173static int efa_com_admin_init_cq(struct efa_com_dev *edev)
 174{
 175        struct efa_com_admin_queue *aq = &edev->aq;
 176        struct efa_com_admin_cq *cq = &aq->cq;
 177        u16 size = aq->depth * sizeof(*cq->entries);
 178        u32 addr_high;
 179        u32 addr_low;
 180        u32 acq_caps;
 181
 182        cq->entries =
 183                dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
 184        if (!cq->entries)
 185                return -ENOMEM;
 186
 187        spin_lock_init(&cq->lock);
 188
 189        cq->cc = 0;
 190        cq->phase = 1;
 191
 192        addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
 193        addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
 194
 195        writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
 196        writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
 197
 198        acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
 199        acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
 200                        EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
 201                        EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
 202        acq_caps |= (aq->msix_vector_idx <<
 203                        EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
 204                        EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
 205
 206        writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
 207
 208        return 0;
 209}
 210
 211static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
 212                                   struct efa_aenq_handlers *aenq_handlers)
 213{
 214        struct efa_com_aenq *aenq = &edev->aenq;
 215        u32 addr_low, addr_high, aenq_caps;
 216        u16 size;
 217
 218        if (!aenq_handlers) {
 219                ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
 220                return -EINVAL;
 221        }
 222
 223        size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
 224        aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
 225                                           GFP_KERNEL);
 226        if (!aenq->entries)
 227                return -ENOMEM;
 228
 229        aenq->aenq_handlers = aenq_handlers;
 230        aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
 231        aenq->cc = 0;
 232        aenq->phase = 1;
 233
 234        addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
 235        addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
 236
 237        writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
 238        writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
 239
 240        aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
 241        aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
 242                EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
 243                EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
 244        aenq_caps |= (aenq->msix_vector_idx
 245                      << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
 246                     EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
 247        writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
 248
 249        /*
 250         * Init cons_db to mark that all entries in the queue
 251         * are initially available
 252         */
 253        writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
 254
 255        return 0;
 256}
 257
 258/* ID to be used with efa_com_get_comp_ctx */
 259static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
 260{
 261        u16 ctx_id;
 262
 263        spin_lock(&aq->comp_ctx_lock);
 264        ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
 265        aq->comp_ctx_pool_next++;
 266        spin_unlock(&aq->comp_ctx_lock);
 267
 268        return ctx_id;
 269}
 270
 271static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
 272                                   u16 ctx_id)
 273{
 274        spin_lock(&aq->comp_ctx_lock);
 275        aq->comp_ctx_pool_next--;
 276        aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
 277        spin_unlock(&aq->comp_ctx_lock);
 278}
 279
 280static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
 281                                        struct efa_comp_ctx *comp_ctx)
 282{
 283        u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command &
 284                     EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
 285        u16 ctx_id = cmd_id & (aq->depth - 1);
 286
 287        ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
 288        comp_ctx->occupied = 0;
 289        efa_com_dealloc_ctx_id(aq, ctx_id);
 290}
 291
 292static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
 293                                                 u16 cmd_id, bool capture)
 294{
 295        u16 ctx_id = cmd_id & (aq->depth - 1);
 296
 297        if (aq->comp_ctx[ctx_id].occupied && capture) {
 298                ibdev_err_ratelimited(
 299                        aq->efa_dev,
 300                        "Completion context for command_id %#x is occupied\n",
 301                        cmd_id);
 302                return NULL;
 303        }
 304
 305        if (capture) {
 306                aq->comp_ctx[ctx_id].occupied = 1;
 307                ibdev_dbg(aq->efa_dev,
 308                          "Take completion ctxt for command_id %#x\n", cmd_id);
 309        }
 310
 311        return &aq->comp_ctx[ctx_id];
 312}
 313
 314static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
 315                                                       struct efa_admin_aq_entry *cmd,
 316                                                       size_t cmd_size_in_bytes,
 317                                                       struct efa_admin_acq_entry *comp,
 318                                                       size_t comp_size_in_bytes)
 319{
 320        struct efa_admin_aq_entry *aqe;
 321        struct efa_comp_ctx *comp_ctx;
 322        u16 queue_size_mask;
 323        u16 cmd_id;
 324        u16 ctx_id;
 325        u16 pi;
 326
 327        queue_size_mask = aq->depth - 1;
 328        pi = aq->sq.pc & queue_size_mask;
 329
 330        ctx_id = efa_com_alloc_ctx_id(aq);
 331
 332        /* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
 333        cmd_id = ctx_id & queue_size_mask;
 334        cmd_id |= aq->sq.pc & ~queue_size_mask;
 335        cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
 336
 337        cmd->aq_common_descriptor.command_id = cmd_id;
 338        cmd->aq_common_descriptor.flags |= aq->sq.phase &
 339                EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
 340
 341        comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
 342        if (!comp_ctx) {
 343                efa_com_dealloc_ctx_id(aq, ctx_id);
 344                return ERR_PTR(-EINVAL);
 345        }
 346
 347        comp_ctx->status = EFA_CMD_SUBMITTED;
 348        comp_ctx->comp_size = comp_size_in_bytes;
 349        comp_ctx->user_cqe = comp;
 350        comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
 351
 352        reinit_completion(&comp_ctx->wait_event);
 353
 354        aqe = &aq->sq.entries[pi];
 355        memset(aqe, 0, sizeof(*aqe));
 356        memcpy(aqe, cmd, cmd_size_in_bytes);
 357
 358        aq->sq.pc++;
 359        atomic64_inc(&aq->stats.submitted_cmd);
 360
 361        if ((aq->sq.pc & queue_size_mask) == 0)
 362                aq->sq.phase = !aq->sq.phase;
 363
 364        /* barrier not needed in case of writel */
 365        writel(aq->sq.pc, aq->sq.db_addr);
 366
 367        return comp_ctx;
 368}
 369
 370static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
 371{
 372        size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
 373        size_t size = aq->depth * sizeof(struct efa_comp_ctx);
 374        struct efa_comp_ctx *comp_ctx;
 375        u16 i;
 376
 377        aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
 378        aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
 379        if (!aq->comp_ctx || !aq->comp_ctx_pool) {
 380                devm_kfree(aq->dmadev, aq->comp_ctx_pool);
 381                devm_kfree(aq->dmadev, aq->comp_ctx);
 382                return -ENOMEM;
 383        }
 384
 385        for (i = 0; i < aq->depth; i++) {
 386                comp_ctx = efa_com_get_comp_ctx(aq, i, false);
 387                if (comp_ctx)
 388                        init_completion(&comp_ctx->wait_event);
 389
 390                aq->comp_ctx_pool[i] = i;
 391        }
 392
 393        spin_lock_init(&aq->comp_ctx_lock);
 394
 395        aq->comp_ctx_pool_next = 0;
 396
 397        return 0;
 398}
 399
 400static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
 401                                                     struct efa_admin_aq_entry *cmd,
 402                                                     size_t cmd_size_in_bytes,
 403                                                     struct efa_admin_acq_entry *comp,
 404                                                     size_t comp_size_in_bytes)
 405{
 406        struct efa_comp_ctx *comp_ctx;
 407
 408        spin_lock(&aq->sq.lock);
 409        if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
 410                ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
 411                spin_unlock(&aq->sq.lock);
 412                return ERR_PTR(-ENODEV);
 413        }
 414
 415        comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
 416                                              comp_size_in_bytes);
 417        spin_unlock(&aq->sq.lock);
 418        if (IS_ERR(comp_ctx))
 419                clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
 420
 421        return comp_ctx;
 422}
 423
 424static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
 425                                                   struct efa_admin_acq_entry *cqe)
 426{
 427        struct efa_comp_ctx *comp_ctx;
 428        u16 cmd_id;
 429
 430        cmd_id = cqe->acq_common_descriptor.command &
 431                 EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
 432
 433        comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
 434        if (!comp_ctx) {
 435                ibdev_err(aq->efa_dev,
 436                          "comp_ctx is NULL. Changing the admin queue running state\n");
 437                clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
 438                return;
 439        }
 440
 441        comp_ctx->status = EFA_CMD_COMPLETED;
 442        comp_ctx->comp_status = cqe->acq_common_descriptor.status;
 443        if (comp_ctx->user_cqe)
 444                memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
 445
 446        if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
 447                complete(&comp_ctx->wait_event);
 448}
 449
 450static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
 451{
 452        struct efa_admin_acq_entry *cqe;
 453        u16 queue_size_mask;
 454        u16 comp_num = 0;
 455        u8 phase;
 456        u16 ci;
 457
 458        queue_size_mask = aq->depth - 1;
 459
 460        ci = aq->cq.cc & queue_size_mask;
 461        phase = aq->cq.phase;
 462
 463        cqe = &aq->cq.entries[ci];
 464
 465        /* Go over all the completions */
 466        while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
 467                EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
 468                /*
 469                 * Do not read the rest of the completion entry before the
 470                 * phase bit was validated
 471                 */
 472                dma_rmb();
 473                efa_com_handle_single_admin_completion(aq, cqe);
 474
 475                ci++;
 476                comp_num++;
 477                if (ci == aq->depth) {
 478                        ci = 0;
 479                        phase = !phase;
 480                }
 481
 482                cqe = &aq->cq.entries[ci];
 483        }
 484
 485        aq->cq.cc += comp_num;
 486        aq->cq.phase = phase;
 487        aq->sq.cc += comp_num;
 488        atomic64_add(comp_num, &aq->stats.completed_cmd);
 489}
 490
 491static int efa_com_comp_status_to_errno(u8 comp_status)
 492{
 493        switch (comp_status) {
 494        case EFA_ADMIN_SUCCESS:
 495                return 0;
 496        case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
 497                return -ENOMEM;
 498        case EFA_ADMIN_UNSUPPORTED_OPCODE:
 499                return -EOPNOTSUPP;
 500        case EFA_ADMIN_BAD_OPCODE:
 501        case EFA_ADMIN_MALFORMED_REQUEST:
 502        case EFA_ADMIN_ILLEGAL_PARAMETER:
 503        case EFA_ADMIN_UNKNOWN_ERROR:
 504                return -EINVAL;
 505        default:
 506                return -EINVAL;
 507        }
 508}
 509
 510static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
 511                                                     struct efa_com_admin_queue *aq)
 512{
 513        unsigned long timeout;
 514        unsigned long flags;
 515        int err;
 516
 517        timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
 518
 519        while (1) {
 520                spin_lock_irqsave(&aq->cq.lock, flags);
 521                efa_com_handle_admin_completion(aq);
 522                spin_unlock_irqrestore(&aq->cq.lock, flags);
 523
 524                if (comp_ctx->status != EFA_CMD_SUBMITTED)
 525                        break;
 526
 527                if (time_is_before_jiffies(timeout)) {
 528                        ibdev_err_ratelimited(
 529                                aq->efa_dev,
 530                                "Wait for completion (polling) timeout\n");
 531                        /* EFA didn't have any completion */
 532                        atomic64_inc(&aq->stats.no_completion);
 533
 534                        clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
 535                        err = -ETIME;
 536                        goto out;
 537                }
 538
 539                msleep(aq->poll_interval);
 540        }
 541
 542        err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
 543out:
 544        efa_com_put_comp_ctx(aq, comp_ctx);
 545        return err;
 546}
 547
 548static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
 549                                                        struct efa_com_admin_queue *aq)
 550{
 551        unsigned long flags;
 552        int err;
 553
 554        wait_for_completion_timeout(&comp_ctx->wait_event,
 555                                    usecs_to_jiffies(aq->completion_timeout));
 556
 557        /*
 558         * In case the command wasn't completed find out the root cause.
 559         * There might be 2 kinds of errors
 560         * 1) No completion (timeout reached)
 561         * 2) There is completion but the device didn't get any msi-x interrupt.
 562         */
 563        if (comp_ctx->status == EFA_CMD_SUBMITTED) {
 564                spin_lock_irqsave(&aq->cq.lock, flags);
 565                efa_com_handle_admin_completion(aq);
 566                spin_unlock_irqrestore(&aq->cq.lock, flags);
 567
 568                atomic64_inc(&aq->stats.no_completion);
 569
 570                if (comp_ctx->status == EFA_CMD_COMPLETED)
 571                        ibdev_err_ratelimited(
 572                                aq->efa_dev,
 573                                "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
 574                                efa_com_cmd_str(comp_ctx->cmd_opcode),
 575                                comp_ctx->cmd_opcode, comp_ctx->status,
 576                                comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
 577                else
 578                        ibdev_err_ratelimited(
 579                                aq->efa_dev,
 580                                "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
 581                                efa_com_cmd_str(comp_ctx->cmd_opcode),
 582                                comp_ctx->cmd_opcode, comp_ctx->status,
 583                                comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
 584
 585                clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
 586                err = -ETIME;
 587                goto out;
 588        }
 589
 590        err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
 591out:
 592        efa_com_put_comp_ctx(aq, comp_ctx);
 593        return err;
 594}
 595
 596/*
 597 * There are two types to wait for completion.
 598 * Polling mode - wait until the completion is available.
 599 * Async mode - wait on wait queue until the completion is ready
 600 * (or the timeout expired).
 601 * It is expected that the IRQ called efa_com_handle_admin_completion
 602 * to mark the completions.
 603 */
 604static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
 605                                             struct efa_com_admin_queue *aq)
 606{
 607        if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
 608                return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
 609
 610        return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
 611}
 612
 613/**
 614 * efa_com_cmd_exec - Execute admin command
 615 * @aq: admin queue.
 616 * @cmd: the admin command to execute.
 617 * @cmd_size: the command size.
 618 * @comp: command completion return entry.
 619 * @comp_size: command completion size.
 620 * Submit an admin command and then wait until the device will return a
 621 * completion.
 622 * The completion will be copied into comp.
 623 *
 624 * @return - 0 on success, negative value on failure.
 625 */
 626int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
 627                     struct efa_admin_aq_entry *cmd,
 628                     size_t cmd_size,
 629                     struct efa_admin_acq_entry *comp,
 630                     size_t comp_size)
 631{
 632        struct efa_comp_ctx *comp_ctx;
 633        int err;
 634
 635        might_sleep();
 636
 637        /* In case of queue FULL */
 638        down(&aq->avail_cmds);
 639
 640        ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
 641                  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
 642                  cmd->aq_common_descriptor.opcode);
 643        comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
 644        if (IS_ERR(comp_ctx)) {
 645                ibdev_err_ratelimited(
 646                        aq->efa_dev,
 647                        "Failed to submit command %s (opcode %u) err %ld\n",
 648                        efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
 649                        cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
 650
 651                up(&aq->avail_cmds);
 652                return PTR_ERR(comp_ctx);
 653        }
 654
 655        err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
 656        if (err)
 657                ibdev_err_ratelimited(
 658                        aq->efa_dev,
 659                        "Failed to process command %s (opcode %u) comp_status %d err %d\n",
 660                        efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
 661                        cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
 662                        err);
 663
 664        up(&aq->avail_cmds);
 665
 666        return err;
 667}
 668
 669/**
 670 * efa_com_admin_destroy - Destroy the admin and the async events queues.
 671 * @edev: EFA communication layer struct
 672 */
 673void efa_com_admin_destroy(struct efa_com_dev *edev)
 674{
 675        struct efa_com_admin_queue *aq = &edev->aq;
 676        struct efa_com_aenq *aenq = &edev->aenq;
 677        struct efa_com_admin_cq *cq = &aq->cq;
 678        struct efa_com_admin_sq *sq = &aq->sq;
 679        u16 size;
 680
 681        clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
 682
 683        devm_kfree(edev->dmadev, aq->comp_ctx_pool);
 684        devm_kfree(edev->dmadev, aq->comp_ctx);
 685
 686        size = aq->depth * sizeof(*sq->entries);
 687        dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
 688
 689        size = aq->depth * sizeof(*cq->entries);
 690        dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
 691
 692        size = aenq->depth * sizeof(*aenq->entries);
 693        dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
 694}
 695
 696/**
 697 * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
 698 * @edev: EFA communication layer struct
 699 * @polling: Enable/Disable polling mode
 700 *
 701 * Set the admin completion mode.
 702 */
 703void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
 704{
 705        u32 mask_value = 0;
 706
 707        if (polling)
 708                mask_value = EFA_REGS_ADMIN_INTR_MASK;
 709
 710        writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
 711        if (polling)
 712                set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
 713        else
 714                clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
 715}
 716
 717static void efa_com_stats_init(struct efa_com_dev *edev)
 718{
 719        atomic64_t *s = (atomic64_t *)&edev->aq.stats;
 720        int i;
 721
 722        for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
 723                atomic64_set(s, 0);
 724}
 725
 726/**
 727 * efa_com_admin_init - Init the admin and the async queues
 728 * @edev: EFA communication layer struct
 729 * @aenq_handlers: Those handlers to be called upon event.
 730 *
 731 * Initialize the admin submission and completion queues.
 732 * Initialize the asynchronous events notification queues.
 733 *
 734 * @return - 0 on success, negative value on failure.
 735 */
 736int efa_com_admin_init(struct efa_com_dev *edev,
 737                       struct efa_aenq_handlers *aenq_handlers)
 738{
 739        struct efa_com_admin_queue *aq = &edev->aq;
 740        u32 timeout;
 741        u32 dev_sts;
 742        u32 cap;
 743        int err;
 744
 745        dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
 746        if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
 747                ibdev_err(edev->efa_dev,
 748                          "Device isn't ready, abort com init %#x\n", dev_sts);
 749                return -ENODEV;
 750        }
 751
 752        aq->depth = EFA_ADMIN_QUEUE_DEPTH;
 753
 754        aq->dmadev = edev->dmadev;
 755        aq->efa_dev = edev->efa_dev;
 756        set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
 757
 758        sema_init(&aq->avail_cmds, aq->depth);
 759
 760        efa_com_stats_init(edev);
 761
 762        err = efa_com_init_comp_ctxt(aq);
 763        if (err)
 764                return err;
 765
 766        err = efa_com_admin_init_sq(edev);
 767        if (err)
 768                goto err_destroy_comp_ctxt;
 769
 770        err = efa_com_admin_init_cq(edev);
 771        if (err)
 772                goto err_destroy_sq;
 773
 774        efa_com_set_admin_polling_mode(edev, false);
 775
 776        err = efa_com_admin_init_aenq(edev, aenq_handlers);
 777        if (err)
 778                goto err_destroy_cq;
 779
 780        cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
 781        timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
 782                  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
 783        if (timeout)
 784                /* the resolution of timeout reg is 100ms */
 785                aq->completion_timeout = timeout * 100000;
 786        else
 787                aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
 788
 789        aq->poll_interval = EFA_POLL_INTERVAL_MS;
 790
 791        set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
 792
 793        return 0;
 794
 795err_destroy_cq:
 796        dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
 797                          aq->cq.entries, aq->cq.dma_addr);
 798err_destroy_sq:
 799        dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
 800                          aq->sq.entries, aq->sq.dma_addr);
 801err_destroy_comp_ctxt:
 802        devm_kfree(edev->dmadev, aq->comp_ctx);
 803
 804        return err;
 805}
 806
 807/**
 808 * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
 809 * @edev: EFA communication layer struct
 810 *
 811 * This method goes over the admin completion queue and wakes up
 812 * all the pending threads that wait on the commands wait event.
 813 *
 814 * @note: Should be called after MSI-X interrupt.
 815 */
 816void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
 817{
 818        unsigned long flags;
 819
 820        spin_lock_irqsave(&edev->aq.cq.lock, flags);
 821        efa_com_handle_admin_completion(&edev->aq);
 822        spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
 823}
 824
 825/*
 826 * efa_handle_specific_aenq_event:
 827 * return the handler that is relevant to the specific event group
 828 */
 829static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
 830                                                     u16 group)
 831{
 832        struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
 833
 834        if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
 835                return aenq_handlers->handlers[group];
 836
 837        return aenq_handlers->unimplemented_handler;
 838}
 839
 840/**
 841 * efa_com_aenq_intr_handler - AENQ interrupt handler
 842 * @edev: EFA communication layer struct
 843 * @data: Data of interrupt handler.
 844 *
 845 * Go over the async event notification queue and call the proper aenq handler.
 846 */
 847void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
 848{
 849        struct efa_admin_aenq_common_desc *aenq_common;
 850        struct efa_com_aenq *aenq = &edev->aenq;
 851        struct efa_admin_aenq_entry *aenq_e;
 852        efa_aenq_handler handler_cb;
 853        u32 processed = 0;
 854        u8 phase;
 855        u32 ci;
 856
 857        ci = aenq->cc & (aenq->depth - 1);
 858        phase = aenq->phase;
 859        aenq_e = &aenq->entries[ci]; /* Get first entry */
 860        aenq_common = &aenq_e->aenq_common_desc;
 861
 862        /* Go over all the events */
 863        while ((READ_ONCE(aenq_common->flags) &
 864                EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
 865                /*
 866                 * Do not read the rest of the completion entry before the
 867                 * phase bit was validated
 868                 */
 869                dma_rmb();
 870
 871                /* Handle specific event*/
 872                handler_cb = efa_com_get_specific_aenq_cb(edev,
 873                                                          aenq_common->group);
 874                handler_cb(data, aenq_e); /* call the actual event handler*/
 875
 876                /* Get next event entry */
 877                ci++;
 878                processed++;
 879
 880                if (ci == aenq->depth) {
 881                        ci = 0;
 882                        phase = !phase;
 883                }
 884                aenq_e = &aenq->entries[ci];
 885                aenq_common = &aenq_e->aenq_common_desc;
 886        }
 887
 888        aenq->cc += processed;
 889        aenq->phase = phase;
 890
 891        /* Don't update aenq doorbell if there weren't any processed events */
 892        if (!processed)
 893                return;
 894
 895        /* barrier not needed in case of writel */
 896        writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
 897}
 898
 899static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
 900{
 901        struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
 902        u32 addr_high;
 903        u32 addr_low;
 904
 905        /* dma_addr_bits is unknown at this point */
 906        addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
 907        addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
 908
 909        writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
 910        writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
 911}
 912
 913int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
 914{
 915        struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
 916
 917        spin_lock_init(&mmio_read->lock);
 918        mmio_read->read_resp =
 919                dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
 920                                   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
 921        if (!mmio_read->read_resp)
 922                return -ENOMEM;
 923
 924        efa_com_mmio_reg_read_resp_addr_init(edev);
 925
 926        mmio_read->read_resp->req_id = 0;
 927        mmio_read->seq_num = 0;
 928        mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
 929
 930        return 0;
 931}
 932
 933void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
 934{
 935        struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
 936
 937        dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
 938                          mmio_read->read_resp, mmio_read->read_resp_dma_addr);
 939}
 940
 941int efa_com_validate_version(struct efa_com_dev *edev)
 942{
 943        u32 ctrl_ver_masked;
 944        u32 ctrl_ver;
 945        u32 ver;
 946
 947        /*
 948         * Make sure the EFA version and the controller version are at least
 949         * as the driver expects
 950         */
 951        ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
 952        ctrl_ver = efa_com_reg_read32(edev,
 953                                      EFA_REGS_CONTROLLER_VERSION_OFF);
 954
 955        ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
 956                  (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
 957                          EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
 958                  ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
 959
 960        if (ver < MIN_EFA_VER) {
 961                ibdev_err(edev->efa_dev,
 962                          "EFA version is lower than the minimal version the driver supports\n");
 963                return -EOPNOTSUPP;
 964        }
 965
 966        ibdev_dbg(edev->efa_dev,
 967                  "efa controller version: %d.%d.%d implementation version %d\n",
 968                  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
 969                          EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
 970                  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
 971                          EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
 972                  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
 973                  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
 974                          EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
 975
 976        ctrl_ver_masked =
 977                (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
 978                (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
 979                (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
 980
 981        /* Validate the ctrl version without the implementation ID */
 982        if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
 983                ibdev_err(edev->efa_dev,
 984                          "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
 985                return -EOPNOTSUPP;
 986        }
 987
 988        return 0;
 989}
 990
 991/**
 992 * efa_com_get_dma_width - Retrieve physical dma address width the device
 993 * supports.
 994 * @edev: EFA communication layer struct
 995 *
 996 * Retrieve the maximum physical address bits the device can handle.
 997 *
 998 * @return: > 0 on Success and negative value otherwise.
 999 */
1000int efa_com_get_dma_width(struct efa_com_dev *edev)
1001{
1002        u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1003        int width;
1004
1005        width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
1006                EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
1007
1008        ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
1009
1010        if (width < 32 || width > 64) {
1011                ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
1012                return -EINVAL;
1013        }
1014
1015        edev->dma_addr_bits = width;
1016
1017        return width;
1018}
1019
1020static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
1021                                u16 exp_state)
1022{
1023        u32 val, i;
1024
1025        for (i = 0; i < timeout; i++) {
1026                val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1027
1028                if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
1029                    exp_state)
1030                        return 0;
1031
1032                ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
1033                msleep(EFA_POLL_INTERVAL_MS);
1034        }
1035
1036        return -ETIME;
1037}
1038
1039/**
1040 * efa_com_dev_reset - Perform device FLR to the device.
1041 * @edev: EFA communication layer struct
1042 * @reset_reason: Specify what is the trigger for the reset in case of an error.
1043 *
1044 * @return - 0 on success, negative value on failure.
1045 */
1046int efa_com_dev_reset(struct efa_com_dev *edev,
1047                      enum efa_regs_reset_reason_types reset_reason)
1048{
1049        u32 stat, timeout, cap, reset_val;
1050        int err;
1051
1052        stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1053        cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1054
1055        if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
1056                ibdev_err(edev->efa_dev,
1057                          "Device isn't ready, can't reset device\n");
1058                return -EINVAL;
1059        }
1060
1061        timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
1062                  EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
1063        if (!timeout) {
1064                ibdev_err(edev->efa_dev, "Invalid timeout value\n");
1065                return -EINVAL;
1066        }
1067
1068        /* start reset */
1069        reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
1070        reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
1071                     EFA_REGS_DEV_CTL_RESET_REASON_MASK;
1072        writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1073
1074        /* reset clears the mmio readless address, restore it */
1075        efa_com_mmio_reg_read_resp_addr_init(edev);
1076
1077        err = wait_for_reset_state(edev, timeout,
1078                                   EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
1079        if (err) {
1080                ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
1081                return err;
1082        }
1083
1084        /* reset done */
1085        writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1086        err = wait_for_reset_state(edev, timeout, 0);
1087        if (err) {
1088                ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
1089                return err;
1090        }
1091
1092        timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
1093                  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
1094        if (timeout)
1095                /* the resolution of timeout reg is 100ms */
1096                edev->aq.completion_timeout = timeout * 100000;
1097        else
1098                edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
1099
1100        return 0;
1101}
1102