linux/drivers/infiniband/core/counters.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
   4 */
   5#include <rdma/ib_verbs.h>
   6#include <rdma/rdma_counter.h>
   7
   8#include "core_priv.h"
   9#include "restrack.h"
  10
  11#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE)
  12
  13static int __counter_set_mode(struct rdma_counter_mode *curr,
  14                              enum rdma_nl_counter_mode new_mode,
  15                              enum rdma_nl_counter_mask new_mask)
  16{
  17        if ((new_mode == RDMA_COUNTER_MODE_AUTO) &&
  18            ((new_mask & (~ALL_AUTO_MODE_MASKS)) ||
  19             (curr->mode != RDMA_COUNTER_MODE_NONE)))
  20                return -EINVAL;
  21
  22        curr->mode = new_mode;
  23        curr->mask = new_mask;
  24        return 0;
  25}
  26
  27/**
  28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
  29 *
  30 * When @on is true, the @mask must be set; When @on is false, it goes
  31 * into manual mode if there's any counter, so that the user is able to
  32 * manually access them.
  33 */
  34int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
  35                               bool on, enum rdma_nl_counter_mask mask)
  36{
  37        struct rdma_port_counter *port_counter;
  38        int ret;
  39
  40        port_counter = &dev->port_data[port].port_counter;
  41        if (!port_counter->hstats)
  42                return -EOPNOTSUPP;
  43
  44        mutex_lock(&port_counter->lock);
  45        if (on) {
  46                ret = __counter_set_mode(&port_counter->mode,
  47                                         RDMA_COUNTER_MODE_AUTO, mask);
  48        } else {
  49                if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
  50                        ret = -EINVAL;
  51                        goto out;
  52                }
  53
  54                if (port_counter->num_counters)
  55                        ret = __counter_set_mode(&port_counter->mode,
  56                                                 RDMA_COUNTER_MODE_MANUAL, 0);
  57                else
  58                        ret = __counter_set_mode(&port_counter->mode,
  59                                                 RDMA_COUNTER_MODE_NONE, 0);
  60        }
  61
  62out:
  63        mutex_unlock(&port_counter->lock);
  64        return ret;
  65}
  66
  67static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
  68                                               enum rdma_nl_counter_mode mode)
  69{
  70        struct rdma_port_counter *port_counter;
  71        struct rdma_counter *counter;
  72        int ret;
  73
  74        if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
  75                return NULL;
  76
  77        counter = kzalloc(sizeof(*counter), GFP_KERNEL);
  78        if (!counter)
  79                return NULL;
  80
  81        counter->device    = dev;
  82        counter->port      = port;
  83        counter->res.type  = RDMA_RESTRACK_COUNTER;
  84        counter->stats     = dev->ops.counter_alloc_stats(counter);
  85        if (!counter->stats)
  86                goto err_stats;
  87
  88        port_counter = &dev->port_data[port].port_counter;
  89        mutex_lock(&port_counter->lock);
  90        if (mode == RDMA_COUNTER_MODE_MANUAL) {
  91                ret = __counter_set_mode(&port_counter->mode,
  92                                         RDMA_COUNTER_MODE_MANUAL, 0);
  93                if (ret)
  94                        goto err_mode;
  95        }
  96
  97        port_counter->num_counters++;
  98        mutex_unlock(&port_counter->lock);
  99
 100        counter->mode.mode = mode;
 101        kref_init(&counter->kref);
 102        mutex_init(&counter->lock);
 103
 104        return counter;
 105
 106err_mode:
 107        mutex_unlock(&port_counter->lock);
 108        kfree(counter->stats);
 109err_stats:
 110        kfree(counter);
 111        return NULL;
 112}
 113
 114static void rdma_counter_free(struct rdma_counter *counter)
 115{
 116        struct rdma_port_counter *port_counter;
 117
 118        port_counter = &counter->device->port_data[counter->port].port_counter;
 119        mutex_lock(&port_counter->lock);
 120        port_counter->num_counters--;
 121        if (!port_counter->num_counters &&
 122            (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
 123                __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE,
 124                                   0);
 125
 126        mutex_unlock(&port_counter->lock);
 127
 128        rdma_restrack_del(&counter->res);
 129        kfree(counter->stats);
 130        kfree(counter);
 131}
 132
 133static void auto_mode_init_counter(struct rdma_counter *counter,
 134                                   const struct ib_qp *qp,
 135                                   enum rdma_nl_counter_mask new_mask)
 136{
 137        struct auto_mode_param *param = &counter->mode.param;
 138
 139        counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
 140        counter->mode.mask = new_mask;
 141
 142        if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
 143                param->qp_type = qp->qp_type;
 144}
 145
 146static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
 147                            enum rdma_nl_counter_mask auto_mask)
 148{
 149        struct auto_mode_param *param = &counter->mode.param;
 150        bool match = true;
 151
 152        if (!rdma_is_visible_in_pid_ns(&qp->res))
 153                return false;
 154
 155        /* Ensure that counter belongs to the right PID */
 156        if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task))
 157                return false;
 158
 159        if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
 160                match &= (param->qp_type == qp->qp_type);
 161
 162        return match;
 163}
 164
 165static int __rdma_counter_bind_qp(struct rdma_counter *counter,
 166                                  struct ib_qp *qp)
 167{
 168        int ret;
 169
 170        if (qp->counter)
 171                return -EINVAL;
 172
 173        if (!qp->device->ops.counter_bind_qp)
 174                return -EOPNOTSUPP;
 175
 176        mutex_lock(&counter->lock);
 177        ret = qp->device->ops.counter_bind_qp(counter, qp);
 178        mutex_unlock(&counter->lock);
 179
 180        return ret;
 181}
 182
 183static int __rdma_counter_unbind_qp(struct ib_qp *qp)
 184{
 185        struct rdma_counter *counter = qp->counter;
 186        int ret;
 187
 188        if (!qp->device->ops.counter_unbind_qp)
 189                return -EOPNOTSUPP;
 190
 191        mutex_lock(&counter->lock);
 192        ret = qp->device->ops.counter_unbind_qp(qp);
 193        mutex_unlock(&counter->lock);
 194
 195        return ret;
 196}
 197
 198static void counter_history_stat_update(const struct rdma_counter *counter)
 199{
 200        struct ib_device *dev = counter->device;
 201        struct rdma_port_counter *port_counter;
 202        int i;
 203
 204        port_counter = &dev->port_data[counter->port].port_counter;
 205        if (!port_counter->hstats)
 206                return;
 207
 208        for (i = 0; i < counter->stats->num_counters; i++)
 209                port_counter->hstats->value[i] += counter->stats->value[i];
 210}
 211
 212/**
 213 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
 214 *     with in auto mode
 215 *
 216 * Return: The counter (with ref-count increased) if found
 217 */
 218static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
 219                                                       u8 port)
 220{
 221        struct rdma_port_counter *port_counter;
 222        struct rdma_counter *counter = NULL;
 223        struct ib_device *dev = qp->device;
 224        struct rdma_restrack_entry *res;
 225        struct rdma_restrack_root *rt;
 226        unsigned long id = 0;
 227
 228        port_counter = &dev->port_data[port].port_counter;
 229        rt = &dev->res[RDMA_RESTRACK_COUNTER];
 230        xa_lock(&rt->xa);
 231        xa_for_each(&rt->xa, id, res) {
 232                if (!rdma_is_visible_in_pid_ns(res))
 233                        continue;
 234
 235                counter = container_of(res, struct rdma_counter, res);
 236                if ((counter->device != qp->device) || (counter->port != port))
 237                        goto next;
 238
 239                if (auto_mode_match(qp, counter, port_counter->mode.mask))
 240                        break;
 241next:
 242                counter = NULL;
 243        }
 244
 245        if (counter && !kref_get_unless_zero(&counter->kref))
 246                counter = NULL;
 247
 248        xa_unlock(&rt->xa);
 249        return counter;
 250}
 251
 252static void rdma_counter_res_add(struct rdma_counter *counter,
 253                                 struct ib_qp *qp)
 254{
 255        if (rdma_is_kernel_res(&qp->res)) {
 256                rdma_restrack_set_task(&counter->res, qp->res.kern_name);
 257                rdma_restrack_kadd(&counter->res);
 258        } else {
 259                rdma_restrack_attach_task(&counter->res, qp->res.task);
 260                rdma_restrack_uadd(&counter->res);
 261        }
 262}
 263
 264static void counter_release(struct kref *kref)
 265{
 266        struct rdma_counter *counter;
 267
 268        counter = container_of(kref, struct rdma_counter, kref);
 269        counter_history_stat_update(counter);
 270        counter->device->ops.counter_dealloc(counter);
 271        rdma_counter_free(counter);
 272}
 273
 274/**
 275 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
 276 *   the auto-mode rule
 277 */
 278int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
 279{
 280        struct rdma_port_counter *port_counter;
 281        struct ib_device *dev = qp->device;
 282        struct rdma_counter *counter;
 283        int ret;
 284
 285        if (!rdma_is_port_valid(dev, port))
 286                return -EINVAL;
 287
 288        port_counter = &dev->port_data[port].port_counter;
 289        if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
 290                return 0;
 291
 292        counter = rdma_get_counter_auto_mode(qp, port);
 293        if (counter) {
 294                ret = __rdma_counter_bind_qp(counter, qp);
 295                if (ret) {
 296                        kref_put(&counter->kref, counter_release);
 297                        return ret;
 298                }
 299        } else {
 300                counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO);
 301                if (!counter)
 302                        return -ENOMEM;
 303
 304                auto_mode_init_counter(counter, qp, port_counter->mode.mask);
 305
 306                ret = __rdma_counter_bind_qp(counter, qp);
 307                if (ret) {
 308                        rdma_counter_free(counter);
 309                        return ret;
 310                }
 311
 312                rdma_counter_res_add(counter, qp);
 313        }
 314
 315        return 0;
 316}
 317
 318/**
 319 * rdma_counter_unbind_qp - Unbind a qp from a counter
 320 * @force:
 321 *   true - Decrease the counter ref-count anyway (e.g., qp destroy)
 322 */
 323int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
 324{
 325        struct rdma_counter *counter = qp->counter;
 326        int ret;
 327
 328        if (!counter)
 329                return -EINVAL;
 330
 331        ret = __rdma_counter_unbind_qp(qp);
 332        if (ret && !force)
 333                return ret;
 334
 335        kref_put(&counter->kref, counter_release);
 336        return 0;
 337}
 338
 339int rdma_counter_query_stats(struct rdma_counter *counter)
 340{
 341        struct ib_device *dev = counter->device;
 342        int ret;
 343
 344        if (!dev->ops.counter_update_stats)
 345                return -EINVAL;
 346
 347        mutex_lock(&counter->lock);
 348        ret = dev->ops.counter_update_stats(counter);
 349        mutex_unlock(&counter->lock);
 350
 351        return ret;
 352}
 353
 354static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
 355                                           u8 port, u32 index)
 356{
 357        struct rdma_restrack_entry *res;
 358        struct rdma_restrack_root *rt;
 359        struct rdma_counter *counter;
 360        unsigned long id = 0;
 361        u64 sum = 0;
 362
 363        rt = &dev->res[RDMA_RESTRACK_COUNTER];
 364        xa_lock(&rt->xa);
 365        xa_for_each(&rt->xa, id, res) {
 366                if (!rdma_restrack_get(res))
 367                        continue;
 368
 369                xa_unlock(&rt->xa);
 370
 371                counter = container_of(res, struct rdma_counter, res);
 372                if ((counter->device != dev) || (counter->port != port) ||
 373                    rdma_counter_query_stats(counter))
 374                        goto next;
 375
 376                sum += counter->stats->value[index];
 377
 378next:
 379                xa_lock(&rt->xa);
 380                rdma_restrack_put(res);
 381        }
 382
 383        xa_unlock(&rt->xa);
 384        return sum;
 385}
 386
 387/**
 388 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
 389 *   specific port, including the running ones and history data
 390 */
 391u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index)
 392{
 393        struct rdma_port_counter *port_counter;
 394        u64 sum;
 395
 396        port_counter = &dev->port_data[port].port_counter;
 397        if (!port_counter->hstats)
 398                return 0;
 399
 400        sum = get_running_counters_hwstat_sum(dev, port, index);
 401        sum += port_counter->hstats->value[index];
 402
 403        return sum;
 404}
 405
 406static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
 407{
 408        struct rdma_restrack_entry *res = NULL;
 409        struct ib_qp *qp = NULL;
 410
 411        res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
 412        if (IS_ERR(res))
 413                return NULL;
 414
 415        if (!rdma_is_visible_in_pid_ns(res))
 416                goto err;
 417
 418        qp = container_of(res, struct ib_qp, res);
 419        if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
 420                goto err;
 421
 422        return qp;
 423
 424err:
 425        rdma_restrack_put(res);
 426        return NULL;
 427}
 428
 429static int rdma_counter_bind_qp_manual(struct rdma_counter *counter,
 430                                       struct ib_qp *qp)
 431{
 432        if ((counter->device != qp->device) || (counter->port != qp->port))
 433                return -EINVAL;
 434
 435        return __rdma_counter_bind_qp(counter, qp);
 436}
 437
 438static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
 439                                                   u32 counter_id)
 440{
 441        struct rdma_restrack_entry *res;
 442        struct rdma_counter *counter;
 443
 444        res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
 445        if (IS_ERR(res))
 446                return NULL;
 447
 448        if (!rdma_is_visible_in_pid_ns(res)) {
 449                rdma_restrack_put(res);
 450                return NULL;
 451        }
 452
 453        counter = container_of(res, struct rdma_counter, res);
 454        kref_get(&counter->kref);
 455        rdma_restrack_put(res);
 456
 457        return counter;
 458}
 459
 460/**
 461 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
 462 */
 463int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
 464                          u32 qp_num, u32 counter_id)
 465{
 466        struct rdma_counter *counter;
 467        struct ib_qp *qp;
 468        int ret;
 469
 470        qp = rdma_counter_get_qp(dev, qp_num);
 471        if (!qp)
 472                return -ENOENT;
 473
 474        counter = rdma_get_counter_by_id(dev, counter_id);
 475        if (!counter) {
 476                ret = -ENOENT;
 477                goto err;
 478        }
 479
 480        if (counter->res.task != qp->res.task) {
 481                ret = -EINVAL;
 482                goto err_task;
 483        }
 484
 485        ret = rdma_counter_bind_qp_manual(counter, qp);
 486        if (ret)
 487                goto err_task;
 488
 489        rdma_restrack_put(&qp->res);
 490        return 0;
 491
 492err_task:
 493        kref_put(&counter->kref, counter_release);
 494err:
 495        rdma_restrack_put(&qp->res);
 496        return ret;
 497}
 498
 499/**
 500 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
 501 *   The id of new counter is returned in @counter_id
 502 */
 503int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
 504                                u32 qp_num, u32 *counter_id)
 505{
 506        struct rdma_counter *counter;
 507        struct ib_qp *qp;
 508        int ret;
 509
 510        if (!rdma_is_port_valid(dev, port))
 511                return -EINVAL;
 512
 513        if (!dev->port_data[port].port_counter.hstats)
 514                return -EOPNOTSUPP;
 515
 516        qp = rdma_counter_get_qp(dev, qp_num);
 517        if (!qp)
 518                return -ENOENT;
 519
 520        if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
 521                ret = -EINVAL;
 522                goto err;
 523        }
 524
 525        counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL);
 526        if (!counter) {
 527                ret = -ENOMEM;
 528                goto err;
 529        }
 530
 531        ret = rdma_counter_bind_qp_manual(counter, qp);
 532        if (ret)
 533                goto err_bind;
 534
 535        if (counter_id)
 536                *counter_id = counter->id;
 537
 538        rdma_counter_res_add(counter, qp);
 539
 540        rdma_restrack_put(&qp->res);
 541        return ret;
 542
 543err_bind:
 544        rdma_counter_free(counter);
 545err:
 546        rdma_restrack_put(&qp->res);
 547        return ret;
 548}
 549
 550/**
 551 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
 552 */
 553int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
 554                            u32 qp_num, u32 counter_id)
 555{
 556        struct rdma_port_counter *port_counter;
 557        struct ib_qp *qp;
 558        int ret;
 559
 560        if (!rdma_is_port_valid(dev, port))
 561                return -EINVAL;
 562
 563        qp = rdma_counter_get_qp(dev, qp_num);
 564        if (!qp)
 565                return -ENOENT;
 566
 567        if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
 568                ret = -EINVAL;
 569                goto out;
 570        }
 571
 572        port_counter = &dev->port_data[port].port_counter;
 573        if (!qp->counter || qp->counter->id != counter_id ||
 574            port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
 575                ret = -EINVAL;
 576                goto out;
 577        }
 578
 579        ret = rdma_counter_unbind_qp(qp, false);
 580
 581out:
 582        rdma_restrack_put(&qp->res);
 583        return ret;
 584}
 585
 586int rdma_counter_get_mode(struct ib_device *dev, u8 port,
 587                          enum rdma_nl_counter_mode *mode,
 588                          enum rdma_nl_counter_mask *mask)
 589{
 590        struct rdma_port_counter *port_counter;
 591
 592        port_counter = &dev->port_data[port].port_counter;
 593        *mode = port_counter->mode.mode;
 594        *mask = port_counter->mode.mask;
 595
 596        return 0;
 597}
 598
 599void rdma_counter_init(struct ib_device *dev)
 600{
 601        struct rdma_port_counter *port_counter;
 602        u32 port;
 603
 604        if (!dev->port_data)
 605                return;
 606
 607        rdma_for_each_port(dev, port) {
 608                port_counter = &dev->port_data[port].port_counter;
 609                port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
 610                mutex_init(&port_counter->lock);
 611
 612                if (!dev->ops.alloc_hw_stats)
 613                        continue;
 614
 615                port_counter->hstats = dev->ops.alloc_hw_stats(dev, port);
 616                if (!port_counter->hstats)
 617                        goto fail;
 618        }
 619
 620        return;
 621
 622fail:
 623        rdma_for_each_port(dev, port) {
 624                port_counter = &dev->port_data[port].port_counter;
 625                kfree(port_counter->hstats);
 626                port_counter->hstats = NULL;
 627        }
 628
 629        return;
 630}
 631
 632void rdma_counter_release(struct ib_device *dev)
 633{
 634        struct rdma_port_counter *port_counter;
 635        u32 port;
 636
 637        rdma_for_each_port(dev, port) {
 638                port_counter = &dev->port_data[port].port_counter;
 639                kfree(port_counter->hstats);
 640        }
 641}
 642