linux/drivers/net/ethernet/mellanox/mlx5/core/events.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2// Copyright (c) 2018 Mellanox Technologies
   3
   4#include <linux/mlx5/driver.h>
   5
   6#include "mlx5_core.h"
   7#include "lib/eq.h"
   8#include "lib/mlx5.h"
   9
  10struct mlx5_event_nb {
  11        struct mlx5_nb  nb;
  12        void           *ctx;
  13};
  14
  15/* General events handlers for the low level mlx5_core driver
  16 *
  17 * Other Major feature specific events such as
  18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
  19 * separate notifiers callbacks, specifically by those mlx5 components.
  20 */
  21static int any_notifier(struct notifier_block *, unsigned long, void *);
  22static int temp_warn(struct notifier_block *, unsigned long, void *);
  23static int port_module(struct notifier_block *, unsigned long, void *);
  24static int pcie_core(struct notifier_block *, unsigned long, void *);
  25
  26/* handler which forwards the event to events->fw_nh, driver notifiers */
  27static int forward_event(struct notifier_block *, unsigned long, void *);
  28
  29static struct mlx5_nb events_nbs_ref[] = {
  30        /* Events to be processed by mlx5_core */
  31        {.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
  32        {.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
  33        {.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
  34        {.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
  35
  36        /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
  37        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
  38        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
  39        /* QP/WQ resource events to forward */
  40        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
  41        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
  42        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
  43        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
  44        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
  45        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
  46        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
  47        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
  48        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
  49        /* SRQ events */
  50        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
  51        {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
  52};
  53
  54struct mlx5_events {
  55        struct mlx5_core_dev *dev;
  56        struct workqueue_struct *wq;
  57        struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
  58        /* driver notifier chain for fw events */
  59        struct atomic_notifier_head fw_nh;
  60        /* port module events stats */
  61        struct mlx5_pme_stats pme_stats;
  62        /*pcie_core*/
  63        struct work_struct pcie_core_work;
  64        /* driver notifier chain for sw events */
  65        struct blocking_notifier_head sw_nh;
  66};
  67
  68static const char *eqe_type_str(u8 type)
  69{
  70        switch (type) {
  71        case MLX5_EVENT_TYPE_COMP:
  72                return "MLX5_EVENT_TYPE_COMP";
  73        case MLX5_EVENT_TYPE_PATH_MIG:
  74                return "MLX5_EVENT_TYPE_PATH_MIG";
  75        case MLX5_EVENT_TYPE_COMM_EST:
  76                return "MLX5_EVENT_TYPE_COMM_EST";
  77        case MLX5_EVENT_TYPE_SQ_DRAINED:
  78                return "MLX5_EVENT_TYPE_SQ_DRAINED";
  79        case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
  80                return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
  81        case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
  82                return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
  83        case MLX5_EVENT_TYPE_CQ_ERROR:
  84                return "MLX5_EVENT_TYPE_CQ_ERROR";
  85        case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
  86                return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
  87        case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
  88                return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
  89        case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
  90                return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
  91        case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
  92                return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
  93        case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
  94                return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
  95        case MLX5_EVENT_TYPE_INTERNAL_ERROR:
  96                return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
  97        case MLX5_EVENT_TYPE_PORT_CHANGE:
  98                return "MLX5_EVENT_TYPE_PORT_CHANGE";
  99        case MLX5_EVENT_TYPE_GPIO_EVENT:
 100                return "MLX5_EVENT_TYPE_GPIO_EVENT";
 101        case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
 102                return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
 103        case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
 104                return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
 105        case MLX5_EVENT_TYPE_REMOTE_CONFIG:
 106                return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
 107        case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
 108                return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
 109        case MLX5_EVENT_TYPE_STALL_EVENT:
 110                return "MLX5_EVENT_TYPE_STALL_EVENT";
 111        case MLX5_EVENT_TYPE_CMD:
 112                return "MLX5_EVENT_TYPE_CMD";
 113        case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
 114                return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
 115        case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
 116                return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
 117        case MLX5_EVENT_TYPE_PAGE_REQUEST:
 118                return "MLX5_EVENT_TYPE_PAGE_REQUEST";
 119        case MLX5_EVENT_TYPE_PAGE_FAULT:
 120                return "MLX5_EVENT_TYPE_PAGE_FAULT";
 121        case MLX5_EVENT_TYPE_PPS_EVENT:
 122                return "MLX5_EVENT_TYPE_PPS_EVENT";
 123        case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
 124                return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
 125        case MLX5_EVENT_TYPE_FPGA_ERROR:
 126                return "MLX5_EVENT_TYPE_FPGA_ERROR";
 127        case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
 128                return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
 129        case MLX5_EVENT_TYPE_GENERAL_EVENT:
 130                return "MLX5_EVENT_TYPE_GENERAL_EVENT";
 131        case MLX5_EVENT_TYPE_MONITOR_COUNTER:
 132                return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
 133        case MLX5_EVENT_TYPE_DEVICE_TRACER:
 134                return "MLX5_EVENT_TYPE_DEVICE_TRACER";
 135        default:
 136                return "Unrecognized event";
 137        }
 138}
 139
 140/* handles all FW events, type == eqe->type */
 141static int any_notifier(struct notifier_block *nb,
 142                        unsigned long type, void *data)
 143{
 144        struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 145        struct mlx5_events   *events   = event_nb->ctx;
 146        struct mlx5_eqe      *eqe      = data;
 147
 148        mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
 149                      eqe_type_str(eqe->type), eqe->sub_type);
 150        return NOTIFY_OK;
 151}
 152
 153/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
 154static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
 155{
 156        struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 157        struct mlx5_events   *events   = event_nb->ctx;
 158        struct mlx5_eqe      *eqe      = data;
 159        u64 value_lsb;
 160        u64 value_msb;
 161
 162        value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
 163        value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
 164
 165        mlx5_core_warn(events->dev,
 166                       "High temperature on sensors with bit set %llx %llx",
 167                       value_msb, value_lsb);
 168
 169        return NOTIFY_OK;
 170}
 171
 172/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
 173static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
 174{
 175        switch (status) {
 176        case MLX5_MODULE_STATUS_PLUGGED:
 177                return "Cable plugged";
 178        case MLX5_MODULE_STATUS_UNPLUGGED:
 179                return "Cable unplugged";
 180        case MLX5_MODULE_STATUS_ERROR:
 181                return "Cable error";
 182        case MLX5_MODULE_STATUS_DISABLED:
 183                return "Cable disabled";
 184        default:
 185                return "Unknown status";
 186        }
 187}
 188
 189static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
 190{
 191        switch (error) {
 192        case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
 193                return "Power budget exceeded";
 194        case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
 195                return "Long Range for non MLNX cable";
 196        case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
 197                return "Bus stuck (I2C or data shorted)";
 198        case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
 199                return "No EEPROM/retry timeout";
 200        case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
 201                return "Enforce part number list";
 202        case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
 203                return "Unknown identifier";
 204        case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
 205                return "High Temperature";
 206        case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
 207                return "Bad or shorted cable/module";
 208        case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
 209                return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
 210        default:
 211                return "Unknown error";
 212        }
 213}
 214
 215/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
 216static int port_module(struct notifier_block *nb, unsigned long type, void *data)
 217{
 218        struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 219        struct mlx5_events   *events   = event_nb->ctx;
 220        struct mlx5_eqe      *eqe      = data;
 221
 222        enum port_module_event_status_type module_status;
 223        enum port_module_event_error_type error_type;
 224        struct mlx5_eqe_port_module *module_event_eqe;
 225        const char *status_str;
 226        u8 module_num;
 227
 228        module_event_eqe = &eqe->data.port_module;
 229        module_status = module_event_eqe->module_status &
 230                        PORT_MODULE_EVENT_MODULE_STATUS_MASK;
 231        error_type = module_event_eqe->error_type &
 232                     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
 233
 234        if (module_status < MLX5_MODULE_STATUS_NUM)
 235                events->pme_stats.status_counters[module_status]++;
 236
 237        if (module_status == MLX5_MODULE_STATUS_ERROR)
 238                if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
 239                        events->pme_stats.error_counters[error_type]++;
 240
 241        if (!printk_ratelimit())
 242                return NOTIFY_OK;
 243
 244        module_num = module_event_eqe->module;
 245        status_str = mlx5_pme_status_to_string(module_status);
 246        if (module_status == MLX5_MODULE_STATUS_ERROR) {
 247                const char *error_str = mlx5_pme_error_to_string(error_type);
 248
 249                mlx5_core_err(events->dev,
 250                              "Port module event[error]: module %u, %s, %s\n",
 251                              module_num, status_str, error_str);
 252        } else {
 253                mlx5_core_info(events->dev,
 254                               "Port module event: module %u, %s\n",
 255                               module_num, status_str);
 256        }
 257
 258        return NOTIFY_OK;
 259}
 260
 261enum {
 262        MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
 263        MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
 264        MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
 265};
 266
 267static void mlx5_pcie_event(struct work_struct *work)
 268{
 269        u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
 270        u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
 271        struct mlx5_events *events;
 272        struct mlx5_core_dev *dev;
 273        u8 power_status;
 274        u16 pci_power;
 275
 276        events = container_of(work, struct mlx5_events, pcie_core_work);
 277        dev  = events->dev;
 278
 279        if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
 280                return;
 281
 282        mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
 283                             MLX5_REG_MPEIN, 0, 0);
 284        power_status = MLX5_GET(mpein_reg, out, pwr_status);
 285        pci_power = MLX5_GET(mpein_reg, out, pci_power);
 286
 287        switch (power_status) {
 288        case MLX5_PCI_POWER_COULD_NOT_BE_READ:
 289                mlx5_core_info_rl(dev,
 290                                  "PCIe slot power capability was not advertised.\n");
 291                break;
 292        case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
 293                mlx5_core_warn_rl(dev,
 294                                  "Detected insufficient power on the PCIe slot (%uW).\n",
 295                                  pci_power);
 296                break;
 297        case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
 298                mlx5_core_info_rl(dev,
 299                                  "PCIe slot advertised sufficient power (%uW).\n",
 300                                  pci_power);
 301                break;
 302        }
 303}
 304
 305static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
 306{
 307        struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
 308                                                        struct mlx5_event_nb,
 309                                                        nb);
 310        struct mlx5_events      *events   = event_nb->ctx;
 311        struct mlx5_eqe         *eqe      = data;
 312
 313        switch (eqe->sub_type) {
 314        case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
 315                        queue_work(events->wq, &events->pcie_core_work);
 316                break;
 317        default:
 318                return NOTIFY_DONE;
 319        }
 320
 321        return NOTIFY_OK;
 322}
 323
 324void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
 325{
 326        *stats = dev->priv.events->pme_stats;
 327}
 328
 329/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
 330static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
 331{
 332        struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
 333        struct mlx5_events   *events   = event_nb->ctx;
 334        struct mlx5_eqe      *eqe      = data;
 335
 336        mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
 337                      eqe_type_str(eqe->type), eqe->sub_type);
 338        atomic_notifier_call_chain(&events->fw_nh, event, data);
 339        return NOTIFY_OK;
 340}
 341
 342int mlx5_events_init(struct mlx5_core_dev *dev)
 343{
 344        struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
 345
 346        if (!events)
 347                return -ENOMEM;
 348
 349        ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
 350        events->dev = dev;
 351        dev->priv.events = events;
 352        events->wq = create_singlethread_workqueue("mlx5_events");
 353        if (!events->wq) {
 354                kfree(events);
 355                return -ENOMEM;
 356        }
 357        INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
 358        BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
 359
 360        return 0;
 361}
 362
 363void mlx5_events_cleanup(struct mlx5_core_dev *dev)
 364{
 365        destroy_workqueue(dev->priv.events->wq);
 366        kvfree(dev->priv.events);
 367}
 368
 369void mlx5_events_start(struct mlx5_core_dev *dev)
 370{
 371        struct mlx5_events *events = dev->priv.events;
 372        int i;
 373
 374        for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
 375                events->notifiers[i].nb  = events_nbs_ref[i];
 376                events->notifiers[i].ctx = events;
 377                mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
 378        }
 379}
 380
 381void mlx5_events_stop(struct mlx5_core_dev *dev)
 382{
 383        struct mlx5_events *events = dev->priv.events;
 384        int i;
 385
 386        for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
 387                mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
 388        flush_workqueue(events->wq);
 389}
 390
 391/* This API is used only for processing and forwarding firmware
 392 * events to mlx5 consumer.
 393 */
 394int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
 395{
 396        struct mlx5_events *events = dev->priv.events;
 397
 398        return atomic_notifier_chain_register(&events->fw_nh, nb);
 399}
 400EXPORT_SYMBOL(mlx5_notifier_register);
 401
 402int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
 403{
 404        struct mlx5_events *events = dev->priv.events;
 405
 406        return atomic_notifier_chain_unregister(&events->fw_nh, nb);
 407}
 408EXPORT_SYMBOL(mlx5_notifier_unregister);
 409
 410int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
 411{
 412        return atomic_notifier_call_chain(&events->fw_nh, event, data);
 413}
 414
 415/* This API is used only for processing and forwarding driver-specific
 416 * events to mlx5 consumers.
 417 */
 418int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
 419{
 420        struct mlx5_events *events = dev->priv.events;
 421
 422        return blocking_notifier_chain_register(&events->sw_nh, nb);
 423}
 424
 425int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
 426{
 427        struct mlx5_events *events = dev->priv.events;
 428
 429        return blocking_notifier_chain_unregister(&events->sw_nh, nb);
 430}
 431
 432int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
 433                                      void *data)
 434{
 435        struct mlx5_events *events = dev->priv.events;
 436
 437        return blocking_notifier_call_chain(&events->sw_nh, event, data);
 438}
 439
 440void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
 441{
 442        queue_work(dev->priv.events->wq, work);
 443}
 444