linux/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/module.h>
  34#include <linux/etherdevice.h>
  35#include <linux/mlx5/driver.h>
  36
  37#include "mlx5_core.h"
  38#include "lib/mlx5.h"
  39#include "lib/eq.h"
  40#include "fpga/core.h"
  41#include "fpga/conn.h"
  42
  43static const char *const mlx5_fpga_error_strings[] = {
  44        "Null Syndrome",
  45        "Corrupted DDR",
  46        "Flash Timeout",
  47        "Internal Link Error",
  48        "Watchdog HW Failure",
  49        "I2C Failure",
  50        "Image Changed",
  51        "Temperature Critical",
  52};
  53
  54static const char * const mlx5_fpga_qp_error_strings[] = {
  55        "Null Syndrome",
  56        "Retry Counter Expired",
  57        "RNR Expired",
  58};
  59static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
  60{
  61        struct mlx5_fpga_device *fdev = NULL;
  62
  63        fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
  64        if (!fdev)
  65                return NULL;
  66
  67        spin_lock_init(&fdev->state_lock);
  68        fdev->state = MLX5_FPGA_STATUS_NONE;
  69        return fdev;
  70}
  71
  72static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
  73{
  74        switch (image) {
  75        case MLX5_FPGA_IMAGE_USER:
  76                return "user";
  77        case MLX5_FPGA_IMAGE_FACTORY:
  78                return "factory";
  79        default:
  80                return "unknown";
  81        }
  82}
  83
  84static const char *mlx5_fpga_device_name(u32 device)
  85{
  86        switch (device) {
  87        case MLX5_FPGA_DEVICE_KU040:
  88                return "ku040";
  89        case MLX5_FPGA_DEVICE_KU060:
  90                return "ku060";
  91        case MLX5_FPGA_DEVICE_KU060_2:
  92                return "ku060_2";
  93        case MLX5_FPGA_DEVICE_UNKNOWN:
  94        default:
  95                return "unknown";
  96        }
  97}
  98
  99static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
 100{
 101        struct mlx5_fpga_query query;
 102        int err;
 103
 104        err = mlx5_fpga_query(fdev->mdev, &query);
 105        if (err) {
 106                mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
 107                return err;
 108        }
 109
 110        fdev->last_admin_image = query.admin_image;
 111        fdev->last_oper_image = query.oper_image;
 112
 113        mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
 114                      query.status, query.admin_image, query.oper_image);
 115
 116        if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
 117                mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
 118                              mlx5_fpga_image_name(fdev->last_oper_image),
 119                              query.status);
 120                return -EIO;
 121        }
 122
 123        return 0;
 124}
 125
 126static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
 127{
 128        int err;
 129        struct mlx5_core_dev *mdev = fdev->mdev;
 130
 131        err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
 132        if (err) {
 133                mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
 134                return err;
 135        }
 136        err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
 137        if (err) {
 138                mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
 139                return err;
 140        }
 141        err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
 142        if (err) {
 143                mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
 144                return err;
 145        }
 146        return 0;
 147}
 148
 149static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
 150
 151static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
 152{
 153        struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
 154
 155        return mlx5_fpga_event(fdev, event, eqe);
 156}
 157
 158static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
 159{
 160        struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
 161
 162        return mlx5_fpga_event(fdev, event, eqe);
 163}
 164
 165int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 166{
 167        struct mlx5_fpga_device *fdev = mdev->fpga;
 168        unsigned int max_num_qps;
 169        unsigned long flags;
 170        u32 fpga_device_id;
 171        int err;
 172
 173        if (!fdev)
 174                return 0;
 175
 176        err = mlx5_fpga_device_load_check(fdev);
 177        if (err)
 178                goto out;
 179
 180        err = mlx5_fpga_caps(fdev->mdev);
 181        if (err)
 182                goto out;
 183
 184        fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
 185        mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
 186                       mlx5_fpga_device_name(fpga_device_id),
 187                       fpga_device_id,
 188                       mlx5_fpga_image_name(fdev->last_oper_image),
 189                       MLX5_CAP_FPGA(fdev->mdev, image_version),
 190                       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
 191                       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
 192                       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
 193
 194        max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
 195        if (!max_num_qps) {
 196                mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
 197                err = -ENOTSUPP;
 198                goto out;
 199        }
 200
 201        err = mlx5_core_reserve_gids(mdev, max_num_qps);
 202        if (err)
 203                goto out;
 204
 205        MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
 206        MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
 207        mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
 208        mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
 209
 210        err = mlx5_fpga_conn_device_init(fdev);
 211        if (err)
 212                goto err_rsvd_gid;
 213
 214        if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
 215                err = mlx5_fpga_device_brb(fdev);
 216                if (err)
 217                        goto err_conn_init;
 218        }
 219
 220        goto out;
 221
 222err_conn_init:
 223        mlx5_fpga_conn_device_cleanup(fdev);
 224
 225err_rsvd_gid:
 226        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
 227        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
 228        mlx5_core_unreserve_gids(mdev, max_num_qps);
 229out:
 230        spin_lock_irqsave(&fdev->state_lock, flags);
 231        fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
 232        spin_unlock_irqrestore(&fdev->state_lock, flags);
 233        return err;
 234}
 235
 236int mlx5_fpga_init(struct mlx5_core_dev *mdev)
 237{
 238        struct mlx5_fpga_device *fdev = NULL;
 239
 240        if (!MLX5_CAP_GEN(mdev, fpga)) {
 241                mlx5_core_dbg(mdev, "FPGA capability not present\n");
 242                return 0;
 243        }
 244
 245        mlx5_core_dbg(mdev, "Initializing FPGA\n");
 246
 247        fdev = mlx5_fpga_device_alloc();
 248        if (!fdev)
 249                return -ENOMEM;
 250
 251        fdev->mdev = mdev;
 252        mdev->fpga = fdev;
 253
 254        return 0;
 255}
 256
 257void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
 258{
 259        struct mlx5_fpga_device *fdev = mdev->fpga;
 260        unsigned int max_num_qps;
 261        unsigned long flags;
 262        int err;
 263
 264        if (!fdev)
 265                return;
 266
 267        spin_lock_irqsave(&fdev->state_lock, flags);
 268        if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
 269                spin_unlock_irqrestore(&fdev->state_lock, flags);
 270                return;
 271        }
 272        fdev->state = MLX5_FPGA_STATUS_NONE;
 273        spin_unlock_irqrestore(&fdev->state_lock, flags);
 274
 275        if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
 276                err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
 277                if (err)
 278                        mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
 279                                      err);
 280        }
 281
 282        mlx5_fpga_conn_device_cleanup(fdev);
 283        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
 284        mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
 285
 286        max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
 287        mlx5_core_unreserve_gids(mdev, max_num_qps);
 288}
 289
 290void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
 291{
 292        struct mlx5_fpga_device *fdev = mdev->fpga;
 293
 294        mlx5_fpga_device_stop(mdev);
 295        kfree(fdev);
 296        mdev->fpga = NULL;
 297}
 298
 299static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
 300{
 301        if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
 302                return mlx5_fpga_error_strings[syndrome];
 303        return "Unknown";
 304}
 305
 306static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
 307{
 308        if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
 309                return mlx5_fpga_qp_error_strings[syndrome];
 310        return "Unknown";
 311}
 312
 313static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
 314                           unsigned long event, void *eqe)
 315{
 316        void *data = ((struct mlx5_eqe *)eqe)->data.raw;
 317        const char *event_name;
 318        bool teardown = false;
 319        unsigned long flags;
 320        u32 fpga_qpn;
 321        u8 syndrome;
 322
 323        switch (event) {
 324        case MLX5_EVENT_TYPE_FPGA_ERROR:
 325                syndrome = MLX5_GET(fpga_error_event, data, syndrome);
 326                event_name = mlx5_fpga_syndrome_to_string(syndrome);
 327                break;
 328        case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
 329                syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
 330                event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
 331                fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
 332                break;
 333        default:
 334                return NOTIFY_DONE;
 335        }
 336
 337        spin_lock_irqsave(&fdev->state_lock, flags);
 338        switch (fdev->state) {
 339        case MLX5_FPGA_STATUS_SUCCESS:
 340                mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
 341                teardown = true;
 342                break;
 343        default:
 344                mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
 345                                           syndrome, event_name);
 346        }
 347        spin_unlock_irqrestore(&fdev->state_lock, flags);
 348        /* We tear-down the card's interfaces and functionality because
 349         * the FPGA bump-on-the-wire is misbehaving and we lose ability
 350         * to communicate with the network. User may still be able to
 351         * recover by re-programming or debugging the FPGA
 352         */
 353        if (teardown)
 354                mlx5_trigger_health_work(fdev->mdev);
 355
 356        return NOTIFY_OK;
 357}
 358