linux/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/module.h>
  34#include <linux/etherdevice.h>
  35#include <linux/mlx5/driver.h>
  36
  37#include "mlx5_core.h"
  38#include "lib/mlx5.h"
  39#include "lib/eq.h"
  40#include "fpga/core.h"
  41#include "fpga/conn.h"
  42
  43static const char *const mlx5_fpga_error_strings[] = {
  44        "Null Syndrome",
  45        "Corrupted DDR",
  46        "Flash Timeout",
  47        "Internal Link Error",
  48        "Watchdog HW Failure",
  49        "I2C Failure",
  50        "Image Changed",
  51        "Temperature Critical",
  52};
  53
  54static const char * const mlx5_fpga_qp_error_strings[] = {
  55        "Null Syndrome",
  56        "Retry Counter Expired",
  57        "RNR Expired",
  58};
  59static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
  60{
  61        struct mlx5_fpga_device *fdev = NULL;
  62
  63        fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
  64        if (!fdev)
  65                return NULL;
  66
  67        spin_lock_init(&fdev->state_lock);
  68        fdev->state = MLX5_FPGA_STATUS_NONE;
  69        return fdev;
  70}
  71
  72static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
  73{
  74        switch (image) {
  75        case MLX5_FPGA_IMAGE_USER:
  76                return "user";
  77        case MLX5_FPGA_IMAGE_FACTORY:
  78                return "factory";
  79        default:
  80                return "unknown";
  81        }
  82}
  83
  84static const char *mlx5_fpga_name(u32 fpga_id)
  85{
  86        static char ret[32];
  87
  88        switch (fpga_id) {
  89        case MLX5_FPGA_NEWTON:
  90                return "Newton";
  91        case MLX5_FPGA_EDISON:
  92                return "Edison";
  93        case MLX5_FPGA_MORSE:
  94                return "Morse";
  95        case MLX5_FPGA_MORSEQ:
  96                return "MorseQ";
  97        }
  98
  99        snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
 100        return ret;
 101}
 102
 103static int mlx5_is_fpga_lookaside(u32 fpga_id)
 104{
 105        return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
 106}
 107
 108static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
 109{
 110        struct mlx5_fpga_query query;
 111        int err;
 112
 113        err = mlx5_fpga_query(fdev->mdev, &query);
 114        if (err) {
 115                mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
 116                return err;
 117        }
 118
 119        fdev->last_admin_image = query.admin_image;
 120        fdev->last_oper_image = query.oper_image;
 121
 122        mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
 123                       query.status, query.admin_image, query.oper_image);
 124
 125        /* for FPGA lookaside projects FPGA load status is not important */
 126        if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
 127                return 0;
 128
 129        if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
 130                mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
 131                              mlx5_fpga_image_name(fdev->last_oper_image),
 132                              query.status);
 133                return -EIO;
 134        }
 135
 136        return 0;
 137}
 138
 139static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
 140{
 141        int err;
 142        struct mlx5_core_dev *mdev = fdev->mdev;
 143
 144        err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
 145        if (err) {
 146                mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
 147                return err;
 148        }
 149        err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
 150        if (err) {
 151                mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
 152                return err;
 153        }
 154        err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
 155        if (err) {
 156                mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
 157                return err;
 158        }
 159        return 0;
 160}
 161
 162static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
 163
 164static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
 165{
 166        struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
 167
 168        return mlx5_fpga_event(fdev, event, eqe);
 169}
 170
 171static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
 172{
 173        struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
 174
 175        return mlx5_fpga_event(fdev, event, eqe);
 176}
 177
 178int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 179{
 180        struct mlx5_fpga_device *fdev = mdev->fpga;
 181        unsigned int max_num_qps;
 182        unsigned long flags;
 183        u32 fpga_id;
 184        int err;
 185
 186        if (!fdev)
 187                return 0;
 188
 189        err = mlx5_fpga_caps(fdev->mdev);
 190        if (err)
 191                goto out;
 192
 193        err = mlx5_fpga_device_load_check(fdev);
 194        if (err)
 195                goto out;
 196
 197        fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
 198        mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
 199
 200        /* No QPs if FPGA does not participate in net processing */
 201        if (mlx5_is_fpga_lookaside(fpga_id))
 202                goto out;
 203
 204        mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
 205                       mlx5_fpga_image_name(fdev->last_oper_image),
 206                       fdev->last_oper_image,
 207                       MLX5_CAP_FPGA(fdev->mdev, image_version),
 208                       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
 209                       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
 210                       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
 211
 212        max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
 213        if (!max_num_qps) {
 214                mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
 215                err = -ENOTSUPP;
 216                goto out;
 217        }
 218
 219        err = mlx5_core_reserve_gids(mdev, max_num_qps);
 220        if (err)
 221                goto out;
 222
 223        MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
 224        MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
 225        mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
 226        mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
 227
 228        err = mlx5_fpga_conn_device_init(fdev);
 229        if (err)
 230                goto err_rsvd_gid;
 231
 232        if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
 233                err = mlx5_fpga_device_brb(fdev);
 234                if (err)
 235                        goto err_conn_init;
 236        }
 237
 238        goto out;
 239
 240err_conn_init:
 241        mlx5_fpga_conn_device_cleanup(fdev);
 242
 243err_rsvd_gid:
 244        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
 245        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
 246        mlx5_core_unreserve_gids(mdev, max_num_qps);
 247out:
 248        spin_lock_irqsave(&fdev->state_lock, flags);
 249        fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
 250        spin_unlock_irqrestore(&fdev->state_lock, flags);
 251        return err;
 252}
 253
 254int mlx5_fpga_init(struct mlx5_core_dev *mdev)
 255{
 256        struct mlx5_fpga_device *fdev = NULL;
 257
 258        if (!MLX5_CAP_GEN(mdev, fpga)) {
 259                mlx5_core_dbg(mdev, "FPGA capability not present\n");
 260                return 0;
 261        }
 262
 263        mlx5_core_dbg(mdev, "Initializing FPGA\n");
 264
 265        fdev = mlx5_fpga_device_alloc();
 266        if (!fdev)
 267                return -ENOMEM;
 268
 269        fdev->mdev = mdev;
 270        mdev->fpga = fdev;
 271
 272        return 0;
 273}
 274
 275void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
 276{
 277        struct mlx5_fpga_device *fdev = mdev->fpga;
 278        unsigned int max_num_qps;
 279        unsigned long flags;
 280        int err;
 281
 282        if (!fdev)
 283                return;
 284
 285        if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
 286                return;
 287
 288        spin_lock_irqsave(&fdev->state_lock, flags);
 289        if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
 290                spin_unlock_irqrestore(&fdev->state_lock, flags);
 291                return;
 292        }
 293        fdev->state = MLX5_FPGA_STATUS_NONE;
 294        spin_unlock_irqrestore(&fdev->state_lock, flags);
 295
 296        if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
 297                err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
 298                if (err)
 299                        mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
 300                                      err);
 301        }
 302
 303        mlx5_fpga_conn_device_cleanup(fdev);
 304        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
 305        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
 306
 307        max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
 308        mlx5_core_unreserve_gids(mdev, max_num_qps);
 309}
 310
 311void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
 312{
 313        struct mlx5_fpga_device *fdev = mdev->fpga;
 314
 315        mlx5_fpga_device_stop(mdev);
 316        kfree(fdev);
 317        mdev->fpga = NULL;
 318}
 319
 320static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
 321{
 322        if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
 323                return mlx5_fpga_error_strings[syndrome];
 324        return "Unknown";
 325}
 326
 327static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
 328{
 329        if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
 330                return mlx5_fpga_qp_error_strings[syndrome];
 331        return "Unknown";
 332}
 333
 334static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
 335                           unsigned long event, void *eqe)
 336{
 337        void *data = ((struct mlx5_eqe *)eqe)->data.raw;
 338        const char *event_name;
 339        bool teardown = false;
 340        unsigned long flags;
 341        u8 syndrome;
 342
 343        switch (event) {
 344        case MLX5_EVENT_TYPE_FPGA_ERROR:
 345                syndrome = MLX5_GET(fpga_error_event, data, syndrome);
 346                event_name = mlx5_fpga_syndrome_to_string(syndrome);
 347                break;
 348        case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
 349                syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
 350                event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
 351                break;
 352        default:
 353                return NOTIFY_DONE;
 354        }
 355
 356        spin_lock_irqsave(&fdev->state_lock, flags);
 357        switch (fdev->state) {
 358        case MLX5_FPGA_STATUS_SUCCESS:
 359                mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
 360                teardown = true;
 361                break;
 362        default:
 363                mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
 364                                           syndrome, event_name);
 365        }
 366        spin_unlock_irqrestore(&fdev->state_lock, flags);
 367        /* We tear-down the card's interfaces and functionality because
 368         * the FPGA bump-on-the-wire is misbehaving and we lose ability
 369         * to communicate with the network. User may still be able to
 370         * recover by re-programming or debugging the FPGA
 371         */
 372        if (teardown)
 373                mlx5_trigger_health_work(fdev->mdev);
 374
 375        return NOTIFY_OK;
 376}
 377