linux/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2// Copyright (c) 2019 Mellanox Technologies.
   3
   4#include "health.h"
   5#include "lib/eq.h"
   6
   7int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
   8{
   9        int err;
  10
  11        err = devlink_fmsg_pair_nest_start(fmsg, name);
  12        if (err)
  13                return err;
  14
  15        err = devlink_fmsg_obj_nest_start(fmsg);
  16        if (err)
  17                return err;
  18
  19        return 0;
  20}
  21
  22int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
  23{
  24        int err;
  25
  26        err = devlink_fmsg_obj_nest_end(fmsg);
  27        if (err)
  28                return err;
  29
  30        err = devlink_fmsg_pair_nest_end(fmsg);
  31        if (err)
  32                return err;
  33
  34        return 0;
  35}
  36
  37int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
  38{
  39        struct mlx5e_priv *priv = cq->channel->priv;
  40        u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
  41        u8 hw_status;
  42        void *cqc;
  43        int err;
  44
  45        err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
  46        if (err)
  47                return err;
  48
  49        cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
  50        hw_status = MLX5_GET(cqc, cqc, status);
  51
  52        err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
  53        if (err)
  54                return err;
  55
  56        err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
  57        if (err)
  58                return err;
  59
  60        err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
  61        if (err)
  62                return err;
  63
  64        err = mlx5e_reporter_named_obj_nest_end(fmsg);
  65        if (err)
  66                return err;
  67
  68        return 0;
  69}
  70
  71int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
  72{
  73        u8 cq_log_stride;
  74        u32 cq_sz;
  75        int err;
  76
  77        cq_sz = mlx5_cqwq_get_size(&cq->wq);
  78        cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
  79
  80        err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
  81        if (err)
  82                return err;
  83
  84        err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
  85        if (err)
  86                return err;
  87
  88        err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
  89        if (err)
  90                return err;
  91
  92        err = mlx5e_reporter_named_obj_nest_end(fmsg);
  93        if (err)
  94                return err;
  95
  96        return 0;
  97}
  98
  99int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
 100{
 101        int err;
 102
 103        err = mlx5e_reporter_tx_create(priv);
 104        if (err)
 105                return err;
 106
 107        err = mlx5e_reporter_rx_create(priv);
 108        if (err)
 109                return err;
 110
 111        return 0;
 112}
 113
 114void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
 115{
 116        mlx5e_reporter_rx_destroy(priv);
 117        mlx5e_reporter_tx_destroy(priv);
 118}
 119
 120void mlx5e_health_channels_update(struct mlx5e_priv *priv)
 121{
 122        if (priv->tx_reporter)
 123                devlink_health_reporter_state_update(priv->tx_reporter,
 124                                                     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 125        if (priv->rx_reporter)
 126                devlink_health_reporter_state_update(priv->rx_reporter,
 127                                                     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 128}
 129
 130int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
 131{
 132        struct mlx5_core_dev *mdev = channel->mdev;
 133        struct net_device *dev = channel->netdev;
 134        struct mlx5e_modify_sq_param msp = {};
 135        int err;
 136
 137        msp.curr_state = MLX5_SQC_STATE_ERR;
 138        msp.next_state = MLX5_SQC_STATE_RST;
 139
 140        err = mlx5e_modify_sq(mdev, sqn, &msp);
 141        if (err) {
 142                netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
 143                return err;
 144        }
 145
 146        memset(&msp, 0, sizeof(msp));
 147        msp.curr_state = MLX5_SQC_STATE_RST;
 148        msp.next_state = MLX5_SQC_STATE_RDY;
 149
 150        err = mlx5e_modify_sq(mdev, sqn, &msp);
 151        if (err) {
 152                netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
 153                return err;
 154        }
 155
 156        return 0;
 157}
 158
 159int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
 160{
 161        int err = 0;
 162
 163        rtnl_lock();
 164        mutex_lock(&priv->state_lock);
 165
 166        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 167                goto out;
 168
 169        err = mlx5e_safe_reopen_channels(priv);
 170
 171out:
 172        mutex_unlock(&priv->state_lock);
 173        rtnl_unlock();
 174
 175        return err;
 176}
 177
 178int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel)
 179{
 180        u32 eqe_count;
 181
 182        netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
 183                   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
 184
 185        eqe_count = mlx5_eq_poll_irq_disabled(eq);
 186        if (!eqe_count)
 187                return -EIO;
 188
 189        netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n",
 190                   eqe_count, eq->core.eqn);
 191
 192        channel->stats->eq_rearm++;
 193        return 0;
 194}
 195
 196int mlx5e_health_report(struct mlx5e_priv *priv,
 197                        struct devlink_health_reporter *reporter, char *err_str,
 198                        struct mlx5e_err_ctx *err_ctx)
 199{
 200        netdev_err(priv->netdev, err_str);
 201
 202        if (!reporter)
 203                return err_ctx->recover(err_ctx->ctx);
 204
 205        return devlink_health_report(reporter, err_str, err_ctx);
 206}
 207