linux/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2// Copyright (c) 2019 Mellanox Technologies.
   3
   4#include "health.h"
   5#include "lib/eq.h"
   6#include "lib/mlx5.h"
   7
   8int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
   9{
  10        int err;
  11
  12        err = devlink_fmsg_pair_nest_start(fmsg, name);
  13        if (err)
  14                return err;
  15
  16        err = devlink_fmsg_obj_nest_start(fmsg);
  17        if (err)
  18                return err;
  19
  20        return 0;
  21}
  22
  23int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
  24{
  25        int err;
  26
  27        err = devlink_fmsg_obj_nest_end(fmsg);
  28        if (err)
  29                return err;
  30
  31        err = devlink_fmsg_pair_nest_end(fmsg);
  32        if (err)
  33                return err;
  34
  35        return 0;
  36}
  37
  38int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
  39{
  40        u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
  41        u8 hw_status;
  42        void *cqc;
  43        int err;
  44
  45        err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
  46        if (err)
  47                return err;
  48
  49        cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
  50        hw_status = MLX5_GET(cqc, cqc, status);
  51
  52        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
  53        if (err)
  54                return err;
  55
  56        err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
  57        if (err)
  58                return err;
  59
  60        err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
  61        if (err)
  62                return err;
  63
  64        err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
  65        if (err)
  66                return err;
  67
  68        err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
  69        if (err)
  70                return err;
  71
  72        err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
  73        if (err)
  74                return err;
  75
  76        return 0;
  77}
  78
  79int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
  80{
  81        u8 cq_log_stride;
  82        u32 cq_sz;
  83        int err;
  84
  85        cq_sz = mlx5_cqwq_get_size(&cq->wq);
  86        cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
  87
  88        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
  89        if (err)
  90                return err;
  91
  92        err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
  93        if (err)
  94                return err;
  95
  96        err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
  97        if (err)
  98                return err;
  99
 100        err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 101        if (err)
 102                return err;
 103
 104        return 0;
 105}
 106
 107int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
 108{
 109        int err;
 110
 111        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
 112        if (err)
 113                return err;
 114
 115        err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
 116        if (err)
 117                return err;
 118
 119        err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
 120        if (err)
 121                return err;
 122
 123        err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
 124        if (err)
 125                return err;
 126
 127        err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
 128        if (err)
 129                return err;
 130
 131        err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
 132        if (err)
 133                return err;
 134
 135        return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 136}
 137
 138void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
 139{
 140        mlx5e_reporter_tx_create(priv);
 141        mlx5e_reporter_rx_create(priv);
 142}
 143
 144void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
 145{
 146        mlx5e_reporter_rx_destroy(priv);
 147        mlx5e_reporter_tx_destroy(priv);
 148}
 149
 150void mlx5e_health_channels_update(struct mlx5e_priv *priv)
 151{
 152        if (priv->tx_reporter)
 153                devlink_health_reporter_state_update(priv->tx_reporter,
 154                                                     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 155        if (priv->rx_reporter)
 156                devlink_health_reporter_state_update(priv->rx_reporter,
 157                                                     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 158}
 159
 160int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
 161{
 162        struct mlx5e_modify_sq_param msp = {};
 163        int err;
 164
 165        msp.curr_state = MLX5_SQC_STATE_ERR;
 166        msp.next_state = MLX5_SQC_STATE_RST;
 167
 168        err = mlx5e_modify_sq(mdev, sqn, &msp);
 169        if (err) {
 170                netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
 171                return err;
 172        }
 173
 174        memset(&msp, 0, sizeof(msp));
 175        msp.curr_state = MLX5_SQC_STATE_RST;
 176        msp.next_state = MLX5_SQC_STATE_RDY;
 177
 178        err = mlx5e_modify_sq(mdev, sqn, &msp);
 179        if (err) {
 180                netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
 181                return err;
 182        }
 183
 184        return 0;
 185}
 186
 187int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
 188{
 189        int err = 0;
 190
 191        rtnl_lock();
 192        mutex_lock(&priv->state_lock);
 193
 194        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 195                goto out;
 196
 197        err = mlx5e_safe_reopen_channels(priv);
 198
 199out:
 200        mutex_unlock(&priv->state_lock);
 201        rtnl_unlock();
 202
 203        return err;
 204}
 205
 206int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
 207                                    struct mlx5e_ch_stats *stats)
 208{
 209        u32 eqe_count;
 210
 211        netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
 212                   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
 213
 214        eqe_count = mlx5_eq_poll_irq_disabled(eq);
 215        if (!eqe_count)
 216                return -EIO;
 217
 218        netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
 219                   eqe_count, eq->core.eqn);
 220
 221        stats->eq_rearm++;
 222        return 0;
 223}
 224
 225int mlx5e_health_report(struct mlx5e_priv *priv,
 226                        struct devlink_health_reporter *reporter, char *err_str,
 227                        struct mlx5e_err_ctx *err_ctx)
 228{
 229        netdev_err(priv->netdev, "%s\n", err_str);
 230
 231        if (!reporter)
 232                return err_ctx->recover(err_ctx->ctx);
 233
 234        return devlink_health_report(reporter, err_str, err_ctx);
 235}
 236
 237#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
 238static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
 239                                        const void *value, u32 value_len)
 240
 241{
 242        u32 data_size;
 243        int err = 0;
 244        u32 offset;
 245
 246        for (offset = 0; offset < value_len; offset += data_size) {
 247                data_size = value_len - offset;
 248                if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
 249                        data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
 250                err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
 251                if (err)
 252                        break;
 253        }
 254        return err;
 255}
 256
 257int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
 258                               struct devlink_fmsg *fmsg)
 259{
 260        struct mlx5_core_dev *mdev = priv->mdev;
 261        struct mlx5_rsc_dump_cmd *cmd;
 262        struct page *page;
 263        int cmd_err, err;
 264        int end_err;
 265        int size;
 266
 267        if (IS_ERR_OR_NULL(mdev->rsc_dump))
 268                return -EOPNOTSUPP;
 269
 270        page = alloc_page(GFP_KERNEL);
 271        if (!page)
 272                return -ENOMEM;
 273
 274        err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
 275        if (err)
 276                goto free_page;
 277
 278        cmd = mlx5_rsc_dump_cmd_create(mdev, key);
 279        if (IS_ERR(cmd)) {
 280                err = PTR_ERR(cmd);
 281                goto free_page;
 282        }
 283
 284        do {
 285                cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
 286                if (cmd_err < 0) {
 287                        err = cmd_err;
 288                        goto destroy_cmd;
 289                }
 290
 291                err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
 292                if (err)
 293                        goto destroy_cmd;
 294
 295        } while (cmd_err > 0);
 296
 297destroy_cmd:
 298        mlx5_rsc_dump_cmd_destroy(cmd);
 299        end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
 300        if (end_err)
 301                err = end_err;
 302free_page:
 303        __free_page(page);
 304        return err;
 305}
 306
 307int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
 308                            int queue_idx, char *lbl)
 309{
 310        struct mlx5_rsc_key key = {};
 311        int err;
 312
 313        key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
 314        key.index1 = queue_idx;
 315        key.size = PAGE_SIZE;
 316        key.num_of_obj1 = 1;
 317
 318        err = devlink_fmsg_obj_nest_start(fmsg);
 319        if (err)
 320                return err;
 321
 322        err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
 323        if (err)
 324                return err;
 325
 326        err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
 327        if (err)
 328                return err;
 329
 330        err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
 331        if (err)
 332                return err;
 333
 334        err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 335        if (err)
 336                return err;
 337
 338        return devlink_fmsg_obj_nest_end(fmsg);
 339}
 340