linux/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2// Copyright (c) 2019 Mellanox Technologies.
   3
   4#include "health.h"
   5#include "lib/eq.h"
   6#include "lib/mlx5.h"
   7
   8int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
   9{
  10        int err;
  11
  12        err = devlink_fmsg_pair_nest_start(fmsg, name);
  13        if (err)
  14                return err;
  15
  16        err = devlink_fmsg_obj_nest_start(fmsg);
  17        if (err)
  18                return err;
  19
  20        return 0;
  21}
  22
  23int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
  24{
  25        int err;
  26
  27        err = devlink_fmsg_obj_nest_end(fmsg);
  28        if (err)
  29                return err;
  30
  31        err = devlink_fmsg_pair_nest_end(fmsg);
  32        if (err)
  33                return err;
  34
  35        return 0;
  36}
  37
  38int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
  39{
  40        struct mlx5e_priv *priv = cq->channel->priv;
  41        u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
  42        u8 hw_status;
  43        void *cqc;
  44        int err;
  45
  46        err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out);
  47        if (err)
  48                return err;
  49
  50        cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
  51        hw_status = MLX5_GET(cqc, cqc, status);
  52
  53        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
  54        if (err)
  55                return err;
  56
  57        err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
  58        if (err)
  59                return err;
  60
  61        err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
  62        if (err)
  63                return err;
  64
  65        err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
  66        if (err)
  67                return err;
  68
  69        err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
  70        if (err)
  71                return err;
  72
  73        err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
  74        if (err)
  75                return err;
  76
  77        return 0;
  78}
  79
  80int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
  81{
  82        u8 cq_log_stride;
  83        u32 cq_sz;
  84        int err;
  85
  86        cq_sz = mlx5_cqwq_get_size(&cq->wq);
  87        cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
  88
  89        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
  90        if (err)
  91                return err;
  92
  93        err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
  94        if (err)
  95                return err;
  96
  97        err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
  98        if (err)
  99                return err;
 100
 101        err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 102        if (err)
 103                return err;
 104
 105        return 0;
 106}
 107
 108int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
 109{
 110        int err;
 111
 112        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
 113        if (err)
 114                return err;
 115
 116        err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
 117        if (err)
 118                return err;
 119
 120        err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
 121        if (err)
 122                return err;
 123
 124        err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
 125        if (err)
 126                return err;
 127
 128        err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
 129        if (err)
 130                return err;
 131
 132        err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent);
 133        if (err)
 134                return err;
 135
 136        return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 137}
 138
 139void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
 140{
 141        mlx5e_reporter_tx_create(priv);
 142        mlx5e_reporter_rx_create(priv);
 143}
 144
 145void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
 146{
 147        mlx5e_reporter_rx_destroy(priv);
 148        mlx5e_reporter_tx_destroy(priv);
 149}
 150
 151void mlx5e_health_channels_update(struct mlx5e_priv *priv)
 152{
 153        if (priv->tx_reporter)
 154                devlink_health_reporter_state_update(priv->tx_reporter,
 155                                                     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 156        if (priv->rx_reporter)
 157                devlink_health_reporter_state_update(priv->rx_reporter,
 158                                                     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 159}
 160
 161int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
 162{
 163        struct mlx5_core_dev *mdev = channel->mdev;
 164        struct net_device *dev = channel->netdev;
 165        struct mlx5e_modify_sq_param msp = {};
 166        int err;
 167
 168        msp.curr_state = MLX5_SQC_STATE_ERR;
 169        msp.next_state = MLX5_SQC_STATE_RST;
 170
 171        err = mlx5e_modify_sq(mdev, sqn, &msp);
 172        if (err) {
 173                netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
 174                return err;
 175        }
 176
 177        memset(&msp, 0, sizeof(msp));
 178        msp.curr_state = MLX5_SQC_STATE_RST;
 179        msp.next_state = MLX5_SQC_STATE_RDY;
 180
 181        err = mlx5e_modify_sq(mdev, sqn, &msp);
 182        if (err) {
 183                netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
 184                return err;
 185        }
 186
 187        return 0;
 188}
 189
 190int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
 191{
 192        int err = 0;
 193
 194        rtnl_lock();
 195        mutex_lock(&priv->state_lock);
 196
 197        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 198                goto out;
 199
 200        err = mlx5e_safe_reopen_channels(priv);
 201
 202out:
 203        mutex_unlock(&priv->state_lock);
 204        rtnl_unlock();
 205
 206        return err;
 207}
 208
 209int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel)
 210{
 211        u32 eqe_count;
 212
 213        netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
 214                   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
 215
 216        eqe_count = mlx5_eq_poll_irq_disabled(eq);
 217        if (!eqe_count)
 218                return -EIO;
 219
 220        netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n",
 221                   eqe_count, eq->core.eqn);
 222
 223        channel->stats->eq_rearm++;
 224        return 0;
 225}
 226
 227int mlx5e_health_report(struct mlx5e_priv *priv,
 228                        struct devlink_health_reporter *reporter, char *err_str,
 229                        struct mlx5e_err_ctx *err_ctx)
 230{
 231        netdev_err(priv->netdev, "%s\n", err_str);
 232
 233        if (!reporter)
 234                return err_ctx->recover(err_ctx->ctx);
 235
 236        return devlink_health_report(reporter, err_str, err_ctx);
 237}
 238
 239#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
 240static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
 241                                        const void *value, u32 value_len)
 242
 243{
 244        u32 data_size;
 245        u32 offset;
 246        int err;
 247
 248        for (offset = 0; offset < value_len; offset += data_size) {
 249                data_size = value_len - offset;
 250                if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
 251                        data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
 252                err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
 253                if (err)
 254                        break;
 255        }
 256        return err;
 257}
 258
 259int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
 260                               struct devlink_fmsg *fmsg)
 261{
 262        struct mlx5_core_dev *mdev = priv->mdev;
 263        struct mlx5_rsc_dump_cmd *cmd;
 264        struct page *page;
 265        int cmd_err, err;
 266        int end_err;
 267        int size;
 268
 269        if (IS_ERR_OR_NULL(mdev->rsc_dump))
 270                return -EOPNOTSUPP;
 271
 272        page = alloc_page(GFP_KERNEL);
 273        if (!page)
 274                return -ENOMEM;
 275
 276        err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
 277        if (err)
 278                return err;
 279
 280        cmd = mlx5_rsc_dump_cmd_create(mdev, key);
 281        if (IS_ERR(cmd)) {
 282                err = PTR_ERR(cmd);
 283                goto free_page;
 284        }
 285
 286        do {
 287                cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
 288                if (cmd_err < 0) {
 289                        err = cmd_err;
 290                        goto destroy_cmd;
 291                }
 292
 293                err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
 294                if (err)
 295                        goto destroy_cmd;
 296
 297        } while (cmd_err > 0);
 298
 299destroy_cmd:
 300        mlx5_rsc_dump_cmd_destroy(cmd);
 301        end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
 302        if (end_err)
 303                err = end_err;
 304free_page:
 305        __free_page(page);
 306        return err;
 307}
 308
 309int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
 310                            int queue_idx, char *lbl)
 311{
 312        struct mlx5_rsc_key key = {};
 313        int err;
 314
 315        key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
 316        key.index1 = queue_idx;
 317        key.size = PAGE_SIZE;
 318        key.num_of_obj1 = 1;
 319
 320        err = devlink_fmsg_obj_nest_start(fmsg);
 321        if (err)
 322                return err;
 323
 324        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
 325        if (err)
 326                return err;
 327
 328        err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
 329        if (err)
 330                return err;
 331
 332        err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
 333        if (err)
 334                return err;
 335
 336        err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 337        if (err)
 338                return err;
 339
 340        return devlink_fmsg_obj_nest_end(fmsg);
 341}
 342