linux/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
   3
   4#include "eswitch.h"
   5#include "esw/qos.h"
   6#include "en/port.h"
   7#define CREATE_TRACE_POINTS
   8#include "diag/qos_tracepoint.h"
   9
  10/* Minimum supported BW share value by the HW is 1 Mbit/sec */
  11#define MLX5_MIN_BW_SHARE 1
  12
  13#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
  14        min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
  15
  16struct mlx5_esw_rate_group {
  17        u32 tsar_ix;
  18        u32 max_rate;
  19        u32 min_rate;
  20        u32 bw_share;
  21        struct list_head list;
  22};
  23
  24static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
  25                               u32 parent_ix, u32 tsar_ix,
  26                               u32 max_rate, u32 bw_share)
  27{
  28        u32 bitmask = 0;
  29
  30        if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
  31                return -EOPNOTSUPP;
  32
  33        MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
  34        MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
  35        MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
  36        bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
  37        bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
  38
  39        return mlx5_modify_scheduling_element_cmd(dev,
  40                                                  SCHEDULING_HIERARCHY_E_SWITCH,
  41                                                  sched_ctx,
  42                                                  tsar_ix,
  43                                                  bitmask);
  44}
  45
  46static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
  47                                u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
  48{
  49        u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
  50        struct mlx5_core_dev *dev = esw->dev;
  51        int err;
  52
  53        err = esw_qos_tsar_config(dev, sched_ctx,
  54                                  esw->qos.root_tsar_ix, group->tsar_ix,
  55                                  max_rate, bw_share);
  56        if (err)
  57                NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
  58
  59        trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
  60
  61        return err;
  62}
  63
  64static int esw_qos_vport_config(struct mlx5_eswitch *esw,
  65                                struct mlx5_vport *vport,
  66                                u32 max_rate, u32 bw_share,
  67                                struct netlink_ext_ack *extack)
  68{
  69        u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
  70        struct mlx5_esw_rate_group *group = vport->qos.group;
  71        struct mlx5_core_dev *dev = esw->dev;
  72        u32 parent_tsar_ix;
  73        void *vport_elem;
  74        int err;
  75
  76        if (!vport->qos.enabled)
  77                return -EIO;
  78
  79        parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
  80        MLX5_SET(scheduling_context, sched_ctx, element_type,
  81                 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
  82        vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
  83                                  element_attributes);
  84        MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
  85
  86        err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
  87                                  max_rate, bw_share);
  88        if (err) {
  89                esw_warn(esw->dev,
  90                         "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
  91                         vport->vport, err);
  92                NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
  93                return err;
  94        }
  95
  96        trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
  97
  98        return 0;
  99}
 100
 101static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
 102                                              struct mlx5_esw_rate_group *group,
 103                                              bool group_level)
 104{
 105        u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
 106        struct mlx5_vport *evport;
 107        u32 max_guarantee = 0;
 108        unsigned long i;
 109
 110        if (group_level) {
 111                struct mlx5_esw_rate_group *group;
 112
 113                list_for_each_entry(group, &esw->qos.groups, list) {
 114                        if (group->min_rate < max_guarantee)
 115                                continue;
 116                        max_guarantee = group->min_rate;
 117                }
 118        } else {
 119                mlx5_esw_for_each_vport(esw, i, evport) {
 120                        if (!evport->enabled || !evport->qos.enabled ||
 121                            evport->qos.group != group || evport->qos.min_rate < max_guarantee)
 122                                continue;
 123                        max_guarantee = evport->qos.min_rate;
 124                }
 125        }
 126
 127        if (max_guarantee)
 128                return max_t(u32, max_guarantee / fw_max_bw_share, 1);
 129
 130        /* If vports min rate divider is 0 but their group has bw_share configured, then
 131         * need to set bw_share for vports to minimal value.
 132         */
 133        if (!group_level && !max_guarantee && group->bw_share)
 134                return 1;
 135        return 0;
 136}
 137
 138static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
 139{
 140        if (divider)
 141                return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
 142
 143        return 0;
 144}
 145
 146static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
 147                                             struct mlx5_esw_rate_group *group,
 148                                             struct netlink_ext_ack *extack)
 149{
 150        u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
 151        u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
 152        struct mlx5_vport *evport;
 153        unsigned long i;
 154        u32 bw_share;
 155        int err;
 156
 157        mlx5_esw_for_each_vport(esw, i, evport) {
 158                if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
 159                        continue;
 160                bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
 161
 162                if (bw_share == evport->qos.bw_share)
 163                        continue;
 164
 165                err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
 166                if (err)
 167                        return err;
 168
 169                evport->qos.bw_share = bw_share;
 170        }
 171
 172        return 0;
 173}
 174
 175static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
 176                                             struct netlink_ext_ack *extack)
 177{
 178        u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
 179        struct mlx5_esw_rate_group *group;
 180        u32 bw_share;
 181        int err;
 182
 183        list_for_each_entry(group, &esw->qos.groups, list) {
 184                bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
 185
 186                if (bw_share == group->bw_share)
 187                        continue;
 188
 189                err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
 190                if (err)
 191                        return err;
 192
 193                group->bw_share = bw_share;
 194
 195                /* All the group's vports need to be set with default bw_share
 196                 * to enable them with QOS
 197                 */
 198                err = esw_qos_normalize_vports_min_rate(esw, group, extack);
 199
 200                if (err)
 201                        return err;
 202        }
 203
 204        return 0;
 205}
 206
 207int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
 208                                    struct mlx5_vport *evport,
 209                                    u32 min_rate,
 210                                    struct netlink_ext_ack *extack)
 211{
 212        u32 fw_max_bw_share, previous_min_rate;
 213        bool min_rate_supported;
 214        int err;
 215
 216        lockdep_assert_held(&esw->state_lock);
 217        fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
 218        min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
 219                                fw_max_bw_share >= MLX5_MIN_BW_SHARE;
 220        if (min_rate && !min_rate_supported)
 221                return -EOPNOTSUPP;
 222        if (min_rate == evport->qos.min_rate)
 223                return 0;
 224
 225        previous_min_rate = evport->qos.min_rate;
 226        evport->qos.min_rate = min_rate;
 227        err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
 228        if (err)
 229                evport->qos.min_rate = previous_min_rate;
 230
 231        return err;
 232}
 233
 234int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
 235                                    struct mlx5_vport *evport,
 236                                    u32 max_rate,
 237                                    struct netlink_ext_ack *extack)
 238{
 239        u32 act_max_rate = max_rate;
 240        bool max_rate_supported;
 241        int err;
 242
 243        lockdep_assert_held(&esw->state_lock);
 244        max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
 245
 246        if (max_rate && !max_rate_supported)
 247                return -EOPNOTSUPP;
 248        if (max_rate == evport->qos.max_rate)
 249                return 0;
 250
 251        /* If parent group has rate limit need to set to group
 252         * value when new max rate is 0.
 253         */
 254        if (evport->qos.group && !max_rate)
 255                act_max_rate = evport->qos.group->max_rate;
 256
 257        err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
 258
 259        if (!err)
 260                evport->qos.max_rate = max_rate;
 261
 262        return err;
 263}
 264
 265static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
 266                                      u32 min_rate, struct netlink_ext_ack *extack)
 267{
 268        u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
 269        struct mlx5_core_dev *dev = esw->dev;
 270        u32 previous_min_rate, divider;
 271        int err;
 272
 273        if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
 274                return -EOPNOTSUPP;
 275
 276        if (min_rate == group->min_rate)
 277                return 0;
 278
 279        previous_min_rate = group->min_rate;
 280        group->min_rate = min_rate;
 281        divider = esw_qos_calculate_min_rate_divider(esw, group, true);
 282        err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
 283        if (err) {
 284                group->min_rate = previous_min_rate;
 285                NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
 286
 287                /* Attempt restoring previous configuration */
 288                divider = esw_qos_calculate_min_rate_divider(esw, group, true);
 289                if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
 290                        NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
 291        }
 292
 293        return err;
 294}
 295
 296static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
 297                                      struct mlx5_esw_rate_group *group,
 298                                      u32 max_rate, struct netlink_ext_ack *extack)
 299{
 300        struct mlx5_vport *vport;
 301        unsigned long i;
 302        int err;
 303
 304        if (group->max_rate == max_rate)
 305                return 0;
 306
 307        err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
 308        if (err)
 309                return err;
 310
 311        group->max_rate = max_rate;
 312
 313        /* Any unlimited vports in the group should be set
 314         * with the value of the group.
 315         */
 316        mlx5_esw_for_each_vport(esw, i, vport) {
 317                if (!vport->enabled || !vport->qos.enabled ||
 318                    vport->qos.group != group || vport->qos.max_rate)
 319                        continue;
 320
 321                err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
 322                if (err)
 323                        NL_SET_ERR_MSG_MOD(extack,
 324                                           "E-Switch vport implicit rate limit setting failed");
 325        }
 326
 327        return err;
 328}
 329
 330static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
 331                                              struct mlx5_vport *vport,
 332                                              u32 max_rate, u32 bw_share)
 333{
 334        u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
 335        struct mlx5_esw_rate_group *group = vport->qos.group;
 336        struct mlx5_core_dev *dev = esw->dev;
 337        u32 parent_tsar_ix;
 338        void *vport_elem;
 339        int err;
 340
 341        parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
 342        MLX5_SET(scheduling_context, sched_ctx, element_type,
 343                 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
 344        vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
 345        MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
 346        MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
 347        MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
 348        MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
 349
 350        err = mlx5_create_scheduling_element_cmd(dev,
 351                                                 SCHEDULING_HIERARCHY_E_SWITCH,
 352                                                 sched_ctx,
 353                                                 &vport->qos.esw_tsar_ix);
 354        if (err) {
 355                esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
 356                         vport->vport, err);
 357                return err;
 358        }
 359
 360        return 0;
 361}
 362
 363static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
 364                                                   struct mlx5_vport *vport,
 365                                                   struct mlx5_esw_rate_group *curr_group,
 366                                                   struct mlx5_esw_rate_group *new_group,
 367                                                   struct netlink_ext_ack *extack)
 368{
 369        u32 max_rate;
 370        int err;
 371
 372        err = mlx5_destroy_scheduling_element_cmd(esw->dev,
 373                                                  SCHEDULING_HIERARCHY_E_SWITCH,
 374                                                  vport->qos.esw_tsar_ix);
 375        if (err) {
 376                NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
 377                return err;
 378        }
 379
 380        vport->qos.group = new_group;
 381        max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
 382
 383        /* If vport is unlimited, we set the group's value.
 384         * Therefore, if the group is limited it will apply to
 385         * the vport as well and if not, vport will remain unlimited.
 386         */
 387        err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
 388        if (err) {
 389                NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
 390                goto err_sched;
 391        }
 392
 393        return 0;
 394
 395err_sched:
 396        vport->qos.group = curr_group;
 397        max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
 398        if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
 399                esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
 400                         vport->vport);
 401
 402        return err;
 403}
 404
 405static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
 406                                      struct mlx5_vport *vport,
 407                                      struct mlx5_esw_rate_group *group,
 408                                      struct netlink_ext_ack *extack)
 409{
 410        struct mlx5_esw_rate_group *new_group, *curr_group;
 411        int err;
 412
 413        if (!vport->enabled)
 414                return -EINVAL;
 415
 416        curr_group = vport->qos.group;
 417        new_group = group ?: esw->qos.group0;
 418        if (curr_group == new_group)
 419                return 0;
 420
 421        err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
 422        if (err)
 423                return err;
 424
 425        /* Recalculate bw share weights of old and new groups */
 426        if (vport->qos.bw_share) {
 427                esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
 428                esw_qos_normalize_vports_min_rate(esw, new_group, extack);
 429        }
 430
 431        return 0;
 432}
 433
 434static struct mlx5_esw_rate_group *
 435esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
 436{
 437        u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
 438        struct mlx5_esw_rate_group *group;
 439        u32 divider;
 440        int err;
 441
 442        if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
 443                return ERR_PTR(-EOPNOTSUPP);
 444
 445        group = kzalloc(sizeof(*group), GFP_KERNEL);
 446        if (!group)
 447                return ERR_PTR(-ENOMEM);
 448
 449        MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
 450                 esw->qos.root_tsar_ix);
 451        err = mlx5_create_scheduling_element_cmd(esw->dev,
 452                                                 SCHEDULING_HIERARCHY_E_SWITCH,
 453                                                 tsar_ctx,
 454                                                 &group->tsar_ix);
 455        if (err) {
 456                NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
 457                goto err_sched_elem;
 458        }
 459
 460        list_add_tail(&group->list, &esw->qos.groups);
 461
 462        divider = esw_qos_calculate_min_rate_divider(esw, group, true);
 463        if (divider) {
 464                err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
 465                if (err) {
 466                        NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
 467                        goto err_min_rate;
 468                }
 469        }
 470        trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
 471
 472        return group;
 473
 474err_min_rate:
 475        list_del(&group->list);
 476        if (mlx5_destroy_scheduling_element_cmd(esw->dev,
 477                                                SCHEDULING_HIERARCHY_E_SWITCH,
 478                                                group->tsar_ix))
 479                NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
 480err_sched_elem:
 481        kfree(group);
 482        return ERR_PTR(err);
 483}
 484
 485static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
 486                                      struct mlx5_esw_rate_group *group,
 487                                      struct netlink_ext_ack *extack)
 488{
 489        u32 divider;
 490        int err;
 491
 492        list_del(&group->list);
 493
 494        divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
 495        err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
 496        if (err)
 497                NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
 498
 499        err = mlx5_destroy_scheduling_element_cmd(esw->dev,
 500                                                  SCHEDULING_HIERARCHY_E_SWITCH,
 501                                                  group->tsar_ix);
 502        if (err)
 503                NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
 504
 505        trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
 506        kfree(group);
 507        return err;
 508}
 509
 510static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
 511{
 512        switch (type) {
 513        case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
 514                return MLX5_CAP_QOS(dev, esw_element_type) &
 515                       ELEMENT_TYPE_CAP_MASK_TASR;
 516        case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
 517                return MLX5_CAP_QOS(dev, esw_element_type) &
 518                       ELEMENT_TYPE_CAP_MASK_VPORT;
 519        case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
 520                return MLX5_CAP_QOS(dev, esw_element_type) &
 521                       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
 522        case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
 523                return MLX5_CAP_QOS(dev, esw_element_type) &
 524                       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
 525        }
 526        return false;
 527}
 528
 529void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
 530{
 531        u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
 532        struct mlx5_core_dev *dev = esw->dev;
 533        __be32 *attr;
 534        int err;
 535
 536        if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
 537                return;
 538
 539        if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
 540                return;
 541
 542        mutex_lock(&esw->state_lock);
 543        if (esw->qos.enabled)
 544                goto unlock;
 545
 546        MLX5_SET(scheduling_context, tsar_ctx, element_type,
 547                 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
 548
 549        attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
 550        *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
 551
 552        err = mlx5_create_scheduling_element_cmd(dev,
 553                                                 SCHEDULING_HIERARCHY_E_SWITCH,
 554                                                 tsar_ctx,
 555                                                 &esw->qos.root_tsar_ix);
 556        if (err) {
 557                esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
 558                goto unlock;
 559        }
 560
 561        INIT_LIST_HEAD(&esw->qos.groups);
 562        if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
 563                esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
 564                if (IS_ERR(esw->qos.group0)) {
 565                        esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
 566                                 PTR_ERR(esw->qos.group0));
 567                        goto err_group0;
 568                }
 569        }
 570        esw->qos.enabled = true;
 571unlock:
 572        mutex_unlock(&esw->state_lock);
 573        return;
 574
 575err_group0:
 576        err = mlx5_destroy_scheduling_element_cmd(esw->dev,
 577                                                  SCHEDULING_HIERARCHY_E_SWITCH,
 578                                                  esw->qos.root_tsar_ix);
 579        if (err)
 580                esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
 581        mutex_unlock(&esw->state_lock);
 582}
 583
 584void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
 585{
 586        struct devlink *devlink = priv_to_devlink(esw->dev);
 587        int err;
 588
 589        devlink_rate_nodes_destroy(devlink);
 590        mutex_lock(&esw->state_lock);
 591        if (!esw->qos.enabled)
 592                goto unlock;
 593
 594        if (esw->qos.group0)
 595                esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
 596
 597        err = mlx5_destroy_scheduling_element_cmd(esw->dev,
 598                                                  SCHEDULING_HIERARCHY_E_SWITCH,
 599                                                  esw->qos.root_tsar_ix);
 600        if (err)
 601                esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
 602
 603        esw->qos.enabled = false;
 604unlock:
 605        mutex_unlock(&esw->state_lock);
 606}
 607
 608int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
 609                              u32 max_rate, u32 bw_share)
 610{
 611        int err;
 612
 613        lockdep_assert_held(&esw->state_lock);
 614        if (!esw->qos.enabled)
 615                return 0;
 616
 617        if (vport->qos.enabled)
 618                return -EEXIST;
 619
 620        vport->qos.group = esw->qos.group0;
 621
 622        err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
 623        if (!err) {
 624                vport->qos.enabled = true;
 625                trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
 626        }
 627
 628        return err;
 629}
 630
 631void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 632{
 633        int err;
 634
 635        lockdep_assert_held(&esw->state_lock);
 636        if (!esw->qos.enabled || !vport->qos.enabled)
 637                return;
 638        WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
 639             "Disabling QoS on port before detaching it from group");
 640
 641        err = mlx5_destroy_scheduling_element_cmd(esw->dev,
 642                                                  SCHEDULING_HIERARCHY_E_SWITCH,
 643                                                  vport->qos.esw_tsar_ix);
 644        if (err)
 645                esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
 646                         vport->vport, err);
 647
 648        vport->qos.enabled = false;
 649        trace_mlx5_esw_vport_qos_destroy(vport);
 650}
 651
 652int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
 653{
 654        u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
 655        struct mlx5_vport *vport;
 656        u32 bitmask;
 657
 658        vport = mlx5_eswitch_get_vport(esw, vport_num);
 659        if (IS_ERR(vport))
 660                return PTR_ERR(vport);
 661
 662        if (!vport->qos.enabled)
 663                return -EOPNOTSUPP;
 664
 665        MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
 666        bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
 667
 668        return mlx5_modify_scheduling_element_cmd(esw->dev,
 669                                                  SCHEDULING_HIERARCHY_E_SWITCH,
 670                                                  ctx,
 671                                                  vport->qos.esw_tsar_ix,
 672                                                  bitmask);
 673}
 674
 675#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
 676
 677/* Converts bytes per second value passed in a pointer into megabits per
 678 * second, rewriting last. If converted rate exceed link speed or is not a
 679 * fraction of Mbps - returns error.
 680 */
 681static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
 682                                        u64 *rate, struct netlink_ext_ack *extack)
 683{
 684        u32 link_speed_max, reminder;
 685        u64 value;
 686        int err;
 687
 688        err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
 689        if (err) {
 690                NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
 691                return err;
 692        }
 693
 694        value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
 695        if (reminder) {
 696                pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
 697                       name, *rate);
 698                NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
 699                return -EINVAL;
 700        }
 701
 702        if (value > link_speed_max) {
 703                pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
 704                       name, value, link_speed_max);
 705                NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
 706                return -EINVAL;
 707        }
 708
 709        *rate = value;
 710        return 0;
 711}
 712
 713/* Eswitch devlink rate API */
 714
 715int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
 716                                            u64 tx_share, struct netlink_ext_ack *extack)
 717{
 718        struct mlx5_vport *vport = priv;
 719        struct mlx5_eswitch *esw;
 720        int err;
 721
 722        esw = vport->dev->priv.eswitch;
 723        if (!mlx5_esw_allowed(esw))
 724                return -EPERM;
 725
 726        err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
 727        if (err)
 728                return err;
 729
 730        mutex_lock(&esw->state_lock);
 731        err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
 732        mutex_unlock(&esw->state_lock);
 733        return err;
 734}
 735
 736int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
 737                                          u64 tx_max, struct netlink_ext_ack *extack)
 738{
 739        struct mlx5_vport *vport = priv;
 740        struct mlx5_eswitch *esw;
 741        int err;
 742
 743        esw = vport->dev->priv.eswitch;
 744        if (!mlx5_esw_allowed(esw))
 745                return -EPERM;
 746
 747        err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
 748        if (err)
 749                return err;
 750
 751        mutex_lock(&esw->state_lock);
 752        err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
 753        mutex_unlock(&esw->state_lock);
 754        return err;
 755}
 756
 757int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
 758                                            u64 tx_share, struct netlink_ext_ack *extack)
 759{
 760        struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
 761        struct mlx5_eswitch *esw = dev->priv.eswitch;
 762        struct mlx5_esw_rate_group *group = priv;
 763        int err;
 764
 765        err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
 766        if (err)
 767                return err;
 768
 769        mutex_lock(&esw->state_lock);
 770        err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
 771        mutex_unlock(&esw->state_lock);
 772        return err;
 773}
 774
 775int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
 776                                          u64 tx_max, struct netlink_ext_ack *extack)
 777{
 778        struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
 779        struct mlx5_eswitch *esw = dev->priv.eswitch;
 780        struct mlx5_esw_rate_group *group = priv;
 781        int err;
 782
 783        err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
 784        if (err)
 785                return err;
 786
 787        mutex_lock(&esw->state_lock);
 788        err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
 789        mutex_unlock(&esw->state_lock);
 790        return err;
 791}
 792
 793int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
 794                                   struct netlink_ext_ack *extack)
 795{
 796        struct mlx5_esw_rate_group *group;
 797        struct mlx5_eswitch *esw;
 798        int err = 0;
 799
 800        esw = mlx5_devlink_eswitch_get(rate_node->devlink);
 801        if (IS_ERR(esw))
 802                return PTR_ERR(esw);
 803
 804        mutex_lock(&esw->state_lock);
 805        if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
 806                NL_SET_ERR_MSG_MOD(extack,
 807                                   "Rate node creation supported only in switchdev mode");
 808                err = -EOPNOTSUPP;
 809                goto unlock;
 810        }
 811
 812        group = esw_qos_create_rate_group(esw, extack);
 813        if (IS_ERR(group)) {
 814                err = PTR_ERR(group);
 815                goto unlock;
 816        }
 817
 818        *priv = group;
 819unlock:
 820        mutex_unlock(&esw->state_lock);
 821        return err;
 822}
 823
 824int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
 825                                   struct netlink_ext_ack *extack)
 826{
 827        struct mlx5_esw_rate_group *group = priv;
 828        struct mlx5_eswitch *esw;
 829        int err;
 830
 831        esw = mlx5_devlink_eswitch_get(rate_node->devlink);
 832        if (IS_ERR(esw))
 833                return PTR_ERR(esw);
 834
 835        mutex_lock(&esw->state_lock);
 836        err = esw_qos_destroy_rate_group(esw, group, extack);
 837        mutex_unlock(&esw->state_lock);
 838        return err;
 839}
 840
 841int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
 842                                    struct mlx5_vport *vport,
 843                                    struct mlx5_esw_rate_group *group,
 844                                    struct netlink_ext_ack *extack)
 845{
 846        int err;
 847
 848        mutex_lock(&esw->state_lock);
 849        err = esw_qos_vport_update_group(esw, vport, group, extack);
 850        mutex_unlock(&esw->state_lock);
 851        return err;
 852}
 853
 854int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
 855                                     struct devlink_rate *parent,
 856                                     void *priv, void *parent_priv,
 857                                     struct netlink_ext_ack *extack)
 858{
 859        struct mlx5_esw_rate_group *group;
 860        struct mlx5_vport *vport = priv;
 861
 862        if (!parent)
 863                return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
 864                                                       vport, NULL, extack);
 865
 866        group = parent_priv;
 867        return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
 868}
 869