linux/drivers/staging/lustre/lustre/ptlrpc/nrs.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License version 2 for more details.  A copy is
  14 * included in the COPYING file that accompanied this code.
  15
  16 * GPL HEADER END
  17 */
  18/*
  19 * Copyright (c) 2011 Intel Corporation
  20 *
  21 * Copyright 2012 Xyratex Technology Limited
  22 */
  23/*
  24 * lustre/ptlrpc/nrs.c
  25 *
  26 * Network Request Scheduler (NRS)
  27 *
  28 * Allows to reorder the handling of RPCs at servers.
  29 *
  30 * Author: Liang Zhen <liang@whamcloud.com>
  31 * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
  32 */
  33/**
  34 * \addtogoup nrs
  35 * @{
  36 */
  37
  38#define DEBUG_SUBSYSTEM S_RPC
  39#include "../include/obd_support.h"
  40#include "../include/obd_class.h"
  41#include "../include/lustre_net.h"
  42#include "../include/lprocfs_status.h"
  43#include "../../include/linux/libcfs/libcfs.h"
  44#include "ptlrpc_internal.h"
  45
  46/**
  47 * NRS core object.
  48 */
  49struct nrs_core nrs_core;
  50
  51static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
  52{
  53        return policy->pol_desc->pd_ops->op_policy_init ?
  54               policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
  55}
  56
  57static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
  58{
  59        LASSERT(policy->pol_ref == 0);
  60        LASSERT(policy->pol_req_queued == 0);
  61
  62        if (policy->pol_desc->pd_ops->op_policy_fini)
  63                policy->pol_desc->pd_ops->op_policy_fini(policy);
  64}
  65
  66static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
  67                                 enum ptlrpc_nrs_ctl opc, void *arg)
  68{
  69        /**
  70         * The policy may be stopped, but the lprocfs files and
  71         * ptlrpc_nrs_policy instances remain present until unregistration time.
  72         * Do not perform the ctl operation if the policy is stopped, as
  73         * policy->pol_private will be NULL in such a case.
  74         */
  75        if (policy->pol_state == NRS_POL_STATE_STOPPED)
  76                return -ENODEV;
  77
  78        return policy->pol_desc->pd_ops->op_policy_ctl ?
  79               policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
  80               -ENOSYS;
  81}
  82
  83static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
  84{
  85        struct ptlrpc_nrs *nrs = policy->pol_nrs;
  86
  87        if (policy->pol_desc->pd_ops->op_policy_stop) {
  88                spin_unlock(&nrs->nrs_lock);
  89
  90                policy->pol_desc->pd_ops->op_policy_stop(policy);
  91
  92                spin_lock(&nrs->nrs_lock);
  93        }
  94
  95        LASSERT(list_empty(&policy->pol_list_queued));
  96        LASSERT(policy->pol_req_queued == 0 &&
  97                policy->pol_req_started == 0);
  98
  99        policy->pol_private = NULL;
 100
 101        policy->pol_state = NRS_POL_STATE_STOPPED;
 102
 103        if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
 104                module_put(policy->pol_desc->pd_owner);
 105}
 106
 107static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
 108{
 109        struct ptlrpc_nrs *nrs = policy->pol_nrs;
 110
 111        if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
 112                return -EPERM;
 113
 114        if (policy->pol_state == NRS_POL_STATE_STARTING)
 115                return -EAGAIN;
 116
 117        /* In progress or already stopped */
 118        if (policy->pol_state != NRS_POL_STATE_STARTED)
 119                return 0;
 120
 121        policy->pol_state = NRS_POL_STATE_STOPPING;
 122
 123        /* Immediately make it invisible */
 124        if (nrs->nrs_policy_primary == policy) {
 125                nrs->nrs_policy_primary = NULL;
 126
 127        } else {
 128                LASSERT(nrs->nrs_policy_fallback == policy);
 129                nrs->nrs_policy_fallback = NULL;
 130        }
 131
 132        /* I have the only refcount */
 133        if (policy->pol_ref == 1)
 134                nrs_policy_stop0(policy);
 135
 136        return 0;
 137}
 138
 139/**
 140 * Transitions the \a nrs NRS head's primary policy to
 141 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
 142 * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
 143 *
 144 * \param[in] nrs the NRS head to carry out this operation on
 145 */
 146static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
 147{
 148        struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
 149
 150        if (!tmp)
 151                return;
 152
 153        nrs->nrs_policy_primary = NULL;
 154
 155        LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
 156        tmp->pol_state = NRS_POL_STATE_STOPPING;
 157
 158        if (tmp->pol_ref == 0)
 159                nrs_policy_stop0(tmp);
 160}
 161
 162/**
 163 * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
 164 * response to an lprocfs command to start a policy.
 165 *
 166 * If a primary policy different to the current one is specified, this function
 167 * will transition the new policy to the
 168 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
 169 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
 170 * the old primary policy (if there is one) to
 171 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
 172 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
 173 *
 174 * If the fallback policy is specified, this is taken to indicate an instruction
 175 * to stop the current primary policy, without substituting it with another
 176 * primary policy, so the primary policy (if any) is transitioned to
 177 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
 178 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
 179 * this case, the fallback policy is only left active in the NRS head.
 180 */
 181static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
 182{
 183        struct ptlrpc_nrs *nrs = policy->pol_nrs;
 184        int rc = 0;
 185
 186        /**
 187         * Don't allow multiple starting which is too complex, and has no real
 188         * benefit.
 189         */
 190        if (nrs->nrs_policy_starting)
 191                return -EAGAIN;
 192
 193        LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
 194
 195        if (policy->pol_state == NRS_POL_STATE_STOPPING)
 196                return -EAGAIN;
 197
 198        if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
 199                /**
 200                 * This is for cases in which the user sets the policy to the
 201                 * fallback policy (currently fifo for all services); i.e. the
 202                 * user is resetting the policy to the default; so we stop the
 203                 * primary policy, if any.
 204                 */
 205                if (policy == nrs->nrs_policy_fallback) {
 206                        nrs_policy_stop_primary(nrs);
 207                        return 0;
 208                }
 209
 210                /**
 211                 * If we reach here, we must be setting up the fallback policy
 212                 * at service startup time, and only a single policy with the
 213                 * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
 214                 * register with NRS core.
 215                 */
 216                LASSERT(!nrs->nrs_policy_fallback);
 217        } else {
 218                /**
 219                 * Shouldn't start primary policy if w/o fallback policy.
 220                 */
 221                if (!nrs->nrs_policy_fallback)
 222                        return -EPERM;
 223
 224                if (policy->pol_state == NRS_POL_STATE_STARTED)
 225                        return 0;
 226        }
 227
 228        /**
 229         * Increase the module usage count for policies registering from other
 230         * modules.
 231         */
 232        if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
 233            !try_module_get(policy->pol_desc->pd_owner)) {
 234                atomic_dec(&policy->pol_desc->pd_refs);
 235                CERROR("NRS: cannot get module for policy %s; is it alive?\n",
 236                       policy->pol_desc->pd_name);
 237                return -ENODEV;
 238        }
 239
 240        /**
 241         * Serialize policy starting across the NRS head
 242         */
 243        nrs->nrs_policy_starting = 1;
 244
 245        policy->pol_state = NRS_POL_STATE_STARTING;
 246
 247        if (policy->pol_desc->pd_ops->op_policy_start) {
 248                spin_unlock(&nrs->nrs_lock);
 249
 250                rc = policy->pol_desc->pd_ops->op_policy_start(policy);
 251
 252                spin_lock(&nrs->nrs_lock);
 253                if (rc != 0) {
 254                        if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
 255                                module_put(policy->pol_desc->pd_owner);
 256
 257                        policy->pol_state = NRS_POL_STATE_STOPPED;
 258                        goto out;
 259                }
 260        }
 261
 262        policy->pol_state = NRS_POL_STATE_STARTED;
 263
 264        if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
 265                /**
 266                 * This path is only used at PTLRPC service setup time.
 267                 */
 268                nrs->nrs_policy_fallback = policy;
 269        } else {
 270                /*
 271                 * Try to stop the current primary policy if there is one.
 272                 */
 273                nrs_policy_stop_primary(nrs);
 274
 275                /**
 276                 * And set the newly-started policy as the primary one.
 277                 */
 278                nrs->nrs_policy_primary = policy;
 279        }
 280
 281out:
 282        nrs->nrs_policy_starting = 0;
 283
 284        return rc;
 285}
 286
 287/**
 288 * Increases the policy's usage reference count.
 289 */
 290static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
 291{
 292        policy->pol_ref++;
 293}
 294
 295/**
 296 * Decreases the policy's usage reference count, and stops the policy in case it
 297 * was already stopping and have no more outstanding usage references (which
 298 * indicates it has no more queued or started requests, and can be safely
 299 * stopped).
 300 */
 301static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
 302{
 303        LASSERT(policy->pol_ref > 0);
 304
 305        policy->pol_ref--;
 306        if (unlikely(policy->pol_ref == 0 &&
 307                     policy->pol_state == NRS_POL_STATE_STOPPING))
 308                nrs_policy_stop0(policy);
 309}
 310
 311static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
 312{
 313        spin_lock(&policy->pol_nrs->nrs_lock);
 314        nrs_policy_put_locked(policy);
 315        spin_unlock(&policy->pol_nrs->nrs_lock);
 316}
 317
 318/**
 319 * Find and return a policy by name.
 320 */
 321static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
 322                                                        char *name)
 323{
 324        struct ptlrpc_nrs_policy *tmp;
 325
 326        list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
 327                if (strncmp(tmp->pol_desc->pd_name, name,
 328                            NRS_POL_NAME_MAX) == 0) {
 329                        nrs_policy_get_locked(tmp);
 330                        return tmp;
 331                }
 332        }
 333        return NULL;
 334}
 335
 336/**
 337 * Release references for the resource hierarchy moving upwards towards the
 338 * policy instance resource.
 339 */
 340static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
 341{
 342        struct ptlrpc_nrs_policy *policy = res->res_policy;
 343
 344        if (policy->pol_desc->pd_ops->op_res_put) {
 345                struct ptlrpc_nrs_resource *parent;
 346
 347                for (; res; res = parent) {
 348                        parent = res->res_parent;
 349                        policy->pol_desc->pd_ops->op_res_put(policy, res);
 350                }
 351        }
 352}
 353
 354/**
 355 * Obtains references for each resource in the resource hierarchy for request
 356 * \a nrq if it is to be handled by \a policy.
 357 *
 358 * \param[in] policy      the policy
 359 * \param[in] nrq         the request
 360 * \param[in] moving_req  denotes whether this is a call to the function by
 361 *                        ldlm_lock_reorder_req(), in order to move \a nrq to
 362 *                        the high-priority NRS head; we should not sleep when
 363 *                        set.
 364 *
 365 * \retval NULL           resource hierarchy references not obtained
 366 * \retval valid-pointer  the bottom level of the resource hierarchy
 367 *
 368 * \see ptlrpc_nrs_pol_ops::op_res_get()
 369 */
 370static
 371struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
 372                                             struct ptlrpc_nrs_request *nrq,
 373                                             bool moving_req)
 374{
 375        /**
 376         * Set to NULL to traverse the resource hierarchy from the top.
 377         */
 378        struct ptlrpc_nrs_resource *res = NULL;
 379        struct ptlrpc_nrs_resource *tmp = NULL;
 380        int rc;
 381
 382        while (1) {
 383                rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
 384                                                          &tmp, moving_req);
 385                if (rc < 0) {
 386                        if (res)
 387                                nrs_resource_put(res);
 388                        return NULL;
 389                }
 390
 391                tmp->res_parent = res;
 392                tmp->res_policy = policy;
 393                res = tmp;
 394                tmp = NULL;
 395                /**
 396                 * Return once we have obtained a reference to the bottom level
 397                 * of the resource hierarchy.
 398                 */
 399                if (rc > 0)
 400                        return res;
 401        }
 402}
 403
 404/**
 405 * Obtains resources for the resource hierarchies and policy references for
 406 * the fallback and current primary policy (if any), that will later be used
 407 * to handle request \a nrq.
 408 *
 409 * \param[in]  nrs  the NRS head instance that will be handling request \a nrq.
 410 * \param[in]  nrq  the request that is being handled.
 411 * \param[out] resp the array where references to the resource hierarchy are
 412 *                  stored.
 413 * \param[in]  moving_req  is set when obtaining resources while moving a
 414 *                         request from a policy on the regular NRS head to a
 415 *                         policy on the HP NRS head (via
 416 *                         ldlm_lock_reorder_req()). It signifies that
 417 *                         allocations to get resources should be atomic; for
 418 *                         a full explanation, see comment in
 419 *                         ptlrpc_nrs_pol_ops::op_res_get().
 420 */
 421static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
 422                                  struct ptlrpc_nrs_request *nrq,
 423                                  struct ptlrpc_nrs_resource **resp,
 424                                  bool moving_req)
 425{
 426        struct ptlrpc_nrs_policy *primary = NULL;
 427        struct ptlrpc_nrs_policy *fallback = NULL;
 428
 429        memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
 430
 431        /**
 432         * Obtain policy references.
 433         */
 434        spin_lock(&nrs->nrs_lock);
 435
 436        fallback = nrs->nrs_policy_fallback;
 437        nrs_policy_get_locked(fallback);
 438
 439        primary = nrs->nrs_policy_primary;
 440        if (primary)
 441                nrs_policy_get_locked(primary);
 442
 443        spin_unlock(&nrs->nrs_lock);
 444
 445        /**
 446         * Obtain resource hierarchy references.
 447         */
 448        resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
 449        LASSERT(resp[NRS_RES_FALLBACK]);
 450
 451        if (primary) {
 452                resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
 453                                                         moving_req);
 454                /**
 455                 * A primary policy may exist which may not wish to serve a
 456                 * particular request for different reasons; release the
 457                 * reference on the policy as it will not be used for this
 458                 * request.
 459                 */
 460                if (!resp[NRS_RES_PRIMARY])
 461                        nrs_policy_put(primary);
 462        }
 463}
 464
 465/**
 466 * Releases references to resource hierarchies and policies, because they are no
 467 * longer required; used when request handling has been completed, or the
 468 * request is moving to the high priority NRS head.
 469 *
 470 * \param resp  the resource hierarchy that is being released
 471 *
 472 * \see ptlrpc_nrs_req_finalize()
 473 */
 474static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
 475{
 476        struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
 477        int i;
 478
 479        for (i = 0; i < NRS_RES_MAX; i++) {
 480                if (resp[i]) {
 481                        pols[i] = resp[i]->res_policy;
 482                        nrs_resource_put(resp[i]);
 483                        resp[i] = NULL;
 484                } else {
 485                        pols[i] = NULL;
 486                }
 487        }
 488
 489        for (i = 0; i < NRS_RES_MAX; i++) {
 490                if (pols[i])
 491                        nrs_policy_put(pols[i]);
 492        }
 493}
 494
 495/**
 496 * Obtains an NRS request from \a policy for handling or examination; the
 497 * request should be removed in the 'handling' case.
 498 *
 499 * Calling into this function implies we already know the policy has a request
 500 * waiting to be handled.
 501 *
 502 * \param[in] policy the policy from which a request
 503 * \param[in] peek   when set, signifies that we just want to examine the
 504 *                   request, and not handle it, so the request is not removed
 505 *                   from the policy.
 506 * \param[in] force  when set, it will force a policy to return a request if it
 507 *                   has one pending
 508 *
 509 * \retval the NRS request to be handled
 510 */
 511static inline
 512struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
 513                                           bool peek, bool force)
 514{
 515        struct ptlrpc_nrs_request *nrq;
 516
 517        LASSERT(policy->pol_req_queued > 0);
 518
 519        nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
 520
 521        LASSERT(ergo(nrq, nrs_request_policy(nrq) == policy));
 522
 523        return nrq;
 524}
 525
 526/**
 527 * Enqueues request \a nrq for later handling, via one one the policies for
 528 * which resources where earlier obtained via nrs_resource_get_safe(). The
 529 * function attempts to enqueue the request first on the primary policy
 530 * (if any), since this is the preferred choice.
 531 *
 532 * \param nrq the request being enqueued
 533 *
 534 * \see nrs_resource_get_safe()
 535 */
 536static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
 537{
 538        struct ptlrpc_nrs_policy *policy;
 539        int rc;
 540        int i;
 541
 542        /**
 543         * Try in descending order, because the primary policy (if any) is
 544         * the preferred choice.
 545         */
 546        for (i = NRS_RES_MAX - 1; i >= 0; i--) {
 547                if (!nrq->nr_res_ptrs[i])
 548                        continue;
 549
 550                nrq->nr_res_idx = i;
 551                policy = nrq->nr_res_ptrs[i]->res_policy;
 552
 553                rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
 554                if (rc == 0) {
 555                        policy->pol_nrs->nrs_req_queued++;
 556                        policy->pol_req_queued++;
 557                        return;
 558                }
 559        }
 560        /**
 561         * Should never get here, as at least the primary policy's
 562         * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
 563         * succeed.
 564         */
 565        LBUG();
 566}
 567
 568/**
 569 * Called when a request has been handled
 570 *
 571 * \param[in] nrs the request that has been handled; can be used for
 572 *                job/resource control.
 573 *
 574 * \see ptlrpc_nrs_req_stop_nolock()
 575 */
 576static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
 577{
 578        struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
 579
 580        if (policy->pol_desc->pd_ops->op_req_stop)
 581                policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
 582
 583        LASSERT(policy->pol_nrs->nrs_req_started > 0);
 584        LASSERT(policy->pol_req_started > 0);
 585
 586        policy->pol_nrs->nrs_req_started--;
 587        policy->pol_req_started--;
 588}
 589
 590/**
 591 * Handler for operations that can be carried out on policies.
 592 *
 593 * Handles opcodes that are common to all policy types within NRS core, and
 594 * passes any unknown opcodes to the policy-specific control function.
 595 *
 596 * \param[in]     nrs  the NRS head this policy belongs to.
 597 * \param[in]     name the human-readable policy name; should be the same as
 598 *                     ptlrpc_nrs_pol_desc::pd_name.
 599 * \param[in]     opc  the opcode of the operation being carried out.
 600 * \param[in,out] arg  can be used to pass information in and out between when
 601 *                     carrying an operation; usually data that is private to
 602 *                     the policy at some level, or generic policy status
 603 *                     information.
 604 *
 605 * \retval -ve error condition
 606 * \retval   0 operation was carried out successfully
 607 */
 608static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
 609                          enum ptlrpc_nrs_ctl opc, void *arg)
 610{
 611        struct ptlrpc_nrs_policy *policy;
 612        int rc = 0;
 613
 614        spin_lock(&nrs->nrs_lock);
 615
 616        policy = nrs_policy_find_locked(nrs, name);
 617        if (!policy) {
 618                rc = -ENOENT;
 619                goto out;
 620        }
 621
 622        switch (opc) {
 623                /**
 624                 * Unknown opcode, pass it down to the policy-specific control
 625                 * function for handling.
 626                 */
 627        default:
 628                rc = nrs_policy_ctl_locked(policy, opc, arg);
 629                break;
 630
 631                /**
 632                 * Start \e policy
 633                 */
 634        case PTLRPC_NRS_CTL_START:
 635                rc = nrs_policy_start_locked(policy);
 636                break;
 637        }
 638out:
 639        if (policy)
 640                nrs_policy_put_locked(policy);
 641
 642        spin_unlock(&nrs->nrs_lock);
 643
 644        return rc;
 645}
 646
 647/**
 648 * Unregisters a policy by name.
 649 *
 650 * \param[in] nrs  the NRS head this policy belongs to.
 651 * \param[in] name the human-readable policy name; should be the same as
 652 *                 ptlrpc_nrs_pol_desc::pd_name
 653 *
 654 * \retval -ve error
 655 * \retval   0 success
 656 */
 657static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
 658{
 659        struct ptlrpc_nrs_policy *policy = NULL;
 660
 661        spin_lock(&nrs->nrs_lock);
 662
 663        policy = nrs_policy_find_locked(nrs, name);
 664        if (!policy) {
 665                spin_unlock(&nrs->nrs_lock);
 666
 667                CERROR("Can't find NRS policy %s\n", name);
 668                return -ENOENT;
 669        }
 670
 671        if (policy->pol_ref > 1) {
 672                CERROR("Policy %s is busy with %d references\n", name,
 673                       (int)policy->pol_ref);
 674                nrs_policy_put_locked(policy);
 675
 676                spin_unlock(&nrs->nrs_lock);
 677                return -EBUSY;
 678        }
 679
 680        LASSERT(policy->pol_req_queued == 0);
 681        LASSERT(policy->pol_req_started == 0);
 682
 683        if (policy->pol_state != NRS_POL_STATE_STOPPED) {
 684                nrs_policy_stop_locked(policy);
 685                LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
 686        }
 687
 688        list_del(&policy->pol_list);
 689        nrs->nrs_num_pols--;
 690
 691        nrs_policy_put_locked(policy);
 692
 693        spin_unlock(&nrs->nrs_lock);
 694
 695        nrs_policy_fini(policy);
 696
 697        LASSERT(!policy->pol_private);
 698        kfree(policy);
 699
 700        return 0;
 701}
 702
 703/**
 704 * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
 705 *
 706 * \param[in] nrs   the NRS head on which the policy will be registered.
 707 * \param[in] desc  the policy descriptor from which the information will be
 708 *                  obtained to register the policy.
 709 *
 710 * \retval -ve error
 711 * \retval   0 success
 712 */
 713static int nrs_policy_register(struct ptlrpc_nrs *nrs,
 714                               struct ptlrpc_nrs_pol_desc *desc)
 715{
 716        struct ptlrpc_nrs_policy *policy;
 717        struct ptlrpc_nrs_policy *tmp;
 718        struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
 719        int rc;
 720
 721        LASSERT(desc->pd_ops->op_res_get);
 722        LASSERT(desc->pd_ops->op_req_get);
 723        LASSERT(desc->pd_ops->op_req_enqueue);
 724        LASSERT(desc->pd_ops->op_req_dequeue);
 725        LASSERT(desc->pd_compat);
 726
 727        policy = kzalloc_node(sizeof(*policy), GFP_NOFS,
 728                        cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
 729                                            svcpt->scp_cpt));
 730        if (!policy)
 731                return -ENOMEM;
 732
 733        policy->pol_nrs = nrs;
 734        policy->pol_desc = desc;
 735        policy->pol_state = NRS_POL_STATE_STOPPED;
 736        policy->pol_flags = desc->pd_flags;
 737
 738        INIT_LIST_HEAD(&policy->pol_list);
 739        INIT_LIST_HEAD(&policy->pol_list_queued);
 740
 741        rc = nrs_policy_init(policy);
 742        if (rc != 0) {
 743                kfree(policy);
 744                return rc;
 745        }
 746
 747        spin_lock(&nrs->nrs_lock);
 748
 749        tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
 750        if (tmp) {
 751                CERROR("NRS policy %s has been registered, can't register it for %s\n",
 752                       policy->pol_desc->pd_name,
 753                       svcpt->scp_service->srv_name);
 754                nrs_policy_put_locked(tmp);
 755
 756                spin_unlock(&nrs->nrs_lock);
 757                nrs_policy_fini(policy);
 758                kfree(policy);
 759
 760                return -EEXIST;
 761        }
 762
 763        list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
 764        nrs->nrs_num_pols++;
 765
 766        if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
 767                rc = nrs_policy_start_locked(policy);
 768
 769        spin_unlock(&nrs->nrs_lock);
 770
 771        if (rc != 0)
 772                (void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
 773
 774        return rc;
 775}
 776
 777/**
 778 * Enqueue request \a req using one of the policies its resources are referring
 779 * to.
 780 *
 781 * \param[in] req the request to enqueue.
 782 */
 783static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
 784{
 785        struct ptlrpc_nrs_policy *policy;
 786
 787        LASSERT(req->rq_nrq.nr_initialized);
 788        LASSERT(!req->rq_nrq.nr_enqueued);
 789
 790        nrs_request_enqueue(&req->rq_nrq);
 791        req->rq_nrq.nr_enqueued = 1;
 792
 793        policy = nrs_request_policy(&req->rq_nrq);
 794        /**
 795         * Add the policy to the NRS head's list of policies with enqueued
 796         * requests, if it has not been added there.
 797         */
 798        if (unlikely(list_empty(&policy->pol_list_queued)))
 799                list_add_tail(&policy->pol_list_queued,
 800                              &policy->pol_nrs->nrs_policy_queued);
 801}
 802
 803/**
 804 * Enqueue a request on the high priority NRS head.
 805 *
 806 * \param req the request to enqueue.
 807 */
 808static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
 809{
 810        int opc = lustre_msg_get_opc(req->rq_reqmsg);
 811
 812        spin_lock(&req->rq_lock);
 813        req->rq_hp = 1;
 814        ptlrpc_nrs_req_add_nolock(req);
 815        if (opc != OBD_PING)
 816                DEBUG_REQ(D_NET, req, "high priority req");
 817        spin_unlock(&req->rq_lock);
 818}
 819
 820/**
 821 * Returns a boolean predicate indicating whether the policy described by
 822 * \a desc is adequate for use with service \a svc.
 823 *
 824 * \param[in] svc  the service
 825 * \param[in] desc the policy descriptor
 826 *
 827 * \retval false the policy is not compatible with the service
 828 * \retval true  the policy is compatible with the service
 829 */
 830static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
 831                                         const struct ptlrpc_nrs_pol_desc *desc)
 832{
 833        return desc->pd_compat(svc, desc);
 834}
 835
 836/**
 837 * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
 838 * \a nrs.
 839 *
 840 * \param[in] nrs the NRS head
 841 *
 842 * \retval -ve error
 843 * \retval   0 success
 844 *
 845 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
 846 *
 847 * \see ptlrpc_service_nrs_setup()
 848 */
 849static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
 850{
 851        struct ptlrpc_nrs_pol_desc *desc;
 852        /* for convenience */
 853        struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
 854        struct ptlrpc_service *svc = svcpt->scp_service;
 855        int rc = -EINVAL;
 856
 857        LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
 858
 859        list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
 860                if (nrs_policy_compatible(svc, desc)) {
 861                        rc = nrs_policy_register(nrs, desc);
 862                        if (rc != 0) {
 863                                CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
 864                                       desc->pd_name, svcpt->scp_cpt,
 865                                       svc->srv_name, rc);
 866                                /**
 867                                 * Fail registration if any of the policies'
 868                                 * registration fails.
 869                                 */
 870                                break;
 871                        }
 872                }
 873        }
 874
 875        return rc;
 876}
 877
 878/**
 879 * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
 880 * compatible policies in NRS core, with the NRS head.
 881 *
 882 * \param[in] nrs   the NRS head
 883 * \param[in] svcpt the PTLRPC service partition to setup
 884 *
 885 * \retval -ve error
 886 * \retval   0 success
 887 *
 888 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
 889 */
 890static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
 891                                   struct ptlrpc_service_part *svcpt)
 892{
 893        enum ptlrpc_nrs_queue_type queue;
 894
 895        LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
 896
 897        if (nrs == &svcpt->scp_nrs_reg)
 898                queue = PTLRPC_NRS_QUEUE_REG;
 899        else if (nrs == svcpt->scp_nrs_hp)
 900                queue = PTLRPC_NRS_QUEUE_HP;
 901        else
 902                LBUG();
 903
 904        nrs->nrs_svcpt = svcpt;
 905        nrs->nrs_queue_type = queue;
 906        spin_lock_init(&nrs->nrs_lock);
 907        INIT_LIST_HEAD(&nrs->nrs_policy_list);
 908        INIT_LIST_HEAD(&nrs->nrs_policy_queued);
 909
 910        return nrs_register_policies_locked(nrs);
 911}
 912
 913/**
 914 * Allocates a regular and optionally a high-priority NRS head (if the service
 915 * handles high-priority RPCs), and then registers all available compatible
 916 * policies on those NRS heads.
 917 *
 918 * \param[in,out] svcpt the PTLRPC service partition to setup
 919 *
 920 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
 921 */
 922static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
 923{
 924        struct ptlrpc_nrs *nrs;
 925        int rc;
 926
 927        LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
 928
 929        /**
 930         * Initialize the regular NRS head.
 931         */
 932        nrs = nrs_svcpt2nrs(svcpt, false);
 933        rc = nrs_svcpt_setup_locked0(nrs, svcpt);
 934        if (rc < 0)
 935                goto out;
 936
 937        /**
 938         * Optionally allocate a high-priority NRS head.
 939         */
 940        if (!svcpt->scp_service->srv_ops.so_hpreq_handler)
 941                goto out;
 942
 943        svcpt->scp_nrs_hp =
 944                kzalloc_node(sizeof(*svcpt->scp_nrs_hp), GFP_NOFS,
 945                        cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
 946                                            svcpt->scp_cpt));
 947        if (!svcpt->scp_nrs_hp) {
 948                rc = -ENOMEM;
 949                goto out;
 950        }
 951
 952        nrs = nrs_svcpt2nrs(svcpt, true);
 953        rc = nrs_svcpt_setup_locked0(nrs, svcpt);
 954
 955out:
 956        return rc;
 957}
 958
 959/**
 960 * Unregisters all policies on all available NRS heads in a service partition;
 961 * called at PTLRPC service unregistration time.
 962 *
 963 * \param[in] svcpt the PTLRPC service partition
 964 *
 965 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
 966 */
 967static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
 968{
 969        struct ptlrpc_nrs *nrs;
 970        struct ptlrpc_nrs_policy *policy;
 971        struct ptlrpc_nrs_policy *tmp;
 972        int rc;
 973        bool hp = false;
 974
 975        LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
 976
 977again:
 978        /* scp_nrs_hp could be NULL due to short of memory. */
 979        nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
 980        /* check the nrs_svcpt to see if nrs is initialized. */
 981        if (!nrs || !nrs->nrs_svcpt)
 982                return;
 983        nrs->nrs_stopping = 1;
 984
 985        list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) {
 986                rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
 987                LASSERT(rc == 0);
 988        }
 989
 990        /**
 991         * If the service partition has an HP NRS head, clean that up as well.
 992         */
 993        if (!hp && nrs_svcpt_has_hp(svcpt)) {
 994                hp = true;
 995                goto again;
 996        }
 997
 998        if (hp)
 999                kfree(nrs);
1000}
1001
1002/**
1003 * Returns the descriptor for a policy as identified by by \a name.
1004 *
1005 * \param[in] name the policy name
1006 *
1007 * \retval the policy descriptor
1008 * \retval NULL
1009 */
1010static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
1011{
1012        struct ptlrpc_nrs_pol_desc *tmp;
1013
1014        list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
1015                if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
1016                        return tmp;
1017        }
1018        return NULL;
1019}
1020
1021/**
1022 * Removes the policy from all supported NRS heads of all partitions of all
1023 * PTLRPC services.
1024 *
1025 * \param[in] desc the policy descriptor to unregister
1026 *
1027 * \retval -ve error
1028 * \retval  0  successfully unregistered policy on all supported NRS heads
1029 *
1030 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
1031 * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
1032 */
1033static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
1034{
1035        struct ptlrpc_nrs *nrs;
1036        struct ptlrpc_service *svc;
1037        struct ptlrpc_service_part *svcpt;
1038        int i;
1039        int rc = 0;
1040
1041        LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
1042        LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
1043
1044        list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1045                if (!nrs_policy_compatible(svc, desc) ||
1046                    unlikely(svc->srv_is_stopping))
1047                        continue;
1048
1049                ptlrpc_service_for_each_part(svcpt, i, svc) {
1050                        bool hp = false;
1051
1052again:
1053                        nrs = nrs_svcpt2nrs(svcpt, hp);
1054                        rc = nrs_policy_unregister(nrs, desc->pd_name);
1055                        /**
1056                         * Ignore -ENOENT as the policy may not have registered
1057                         * successfully on all service partitions.
1058                         */
1059                        if (rc == -ENOENT) {
1060                                rc = 0;
1061                        } else if (rc != 0) {
1062                                CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
1063                                       desc->pd_name, svcpt->scp_cpt,
1064                                       svcpt->scp_service->srv_name, rc);
1065                                return rc;
1066                        }
1067
1068                        if (!hp && nrs_svc_has_hp(svc)) {
1069                                hp = true;
1070                                goto again;
1071                        }
1072                }
1073
1074                if (desc->pd_ops->op_lprocfs_fini)
1075                        desc->pd_ops->op_lprocfs_fini(svc);
1076        }
1077
1078        return rc;
1079}
1080
1081/**
1082 * Registers a new policy with NRS core.
1083 *
1084 * The function will only succeed if policy registration with all compatible
1085 * service partitions (if any) is successful.
1086 *
1087 * N.B. This function should be called either at ptlrpc module initialization
1088 *      time when registering a policy that ships with NRS core, or in a
1089 *      module's init() function for policies registering from other modules.
1090 *
1091 * \param[in] conf configuration information for the new policy to register
1092 *
1093 * \retval -ve error
1094 * \retval   0 success
1095 */
1096static int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
1097{
1098        struct ptlrpc_service *svc;
1099        struct ptlrpc_nrs_pol_desc *desc;
1100        size_t len;
1101        int rc = 0;
1102
1103        LASSERT(conf->nc_ops);
1104        LASSERT(conf->nc_compat);
1105        LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
1106                     conf->nc_compat_svc_name));
1107        LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
1108                     conf->nc_owner));
1109
1110        conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1111
1112        /**
1113         * External policies are not allowed to start immediately upon
1114         * registration, as there is a relatively higher chance that their
1115         * registration might fail. In such a case, some policy instances may
1116         * already have requests queued wen unregistration needs to happen as
1117         * part o cleanup; since there is currently no way to drain requests
1118         * from a policy unless the service is unregistering, we just disallow
1119         * this.
1120         */
1121        if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
1122            (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
1123                               PTLRPC_NRS_FL_REG_START))) {
1124                CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
1125                       conf->nc_name);
1126                return -EINVAL;
1127        }
1128
1129        mutex_lock(&nrs_core.nrs_mutex);
1130
1131        if (nrs_policy_find_desc_locked(conf->nc_name)) {
1132                CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
1133                       conf->nc_name);
1134                rc = -EEXIST;
1135                goto fail;
1136        }
1137
1138        desc = kzalloc(sizeof(*desc), GFP_NOFS);
1139        if (!desc) {
1140                rc = -ENOMEM;
1141                goto fail;
1142        }
1143
1144        len = strlcpy(desc->pd_name, conf->nc_name, sizeof(desc->pd_name));
1145        if (len >= sizeof(desc->pd_name)) {
1146                kfree(desc);
1147                rc = -E2BIG;
1148                goto fail;
1149        }
1150        desc->pd_ops = conf->nc_ops;
1151        desc->pd_compat = conf->nc_compat;
1152        desc->pd_compat_svc_name = conf->nc_compat_svc_name;
1153        if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
1154                desc->pd_owner = conf->nc_owner;
1155        desc->pd_flags = conf->nc_flags;
1156        atomic_set(&desc->pd_refs, 0);
1157
1158        /**
1159         * For policies that are held in the same module as NRS (currently
1160         * ptlrpc), do not register the policy with all compatible services,
1161         * as the services will not have started at this point, since we are
1162         * calling from ptlrpc module initialization code. In such cases each
1163         * service will register all compatible policies later, via
1164         * ptlrpc_service_nrs_setup().
1165         */
1166        if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
1167                goto internal;
1168
1169        /**
1170         * Register the new policy on all compatible services
1171         */
1172        mutex_lock(&ptlrpc_all_services_mutex);
1173
1174        list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1175                struct ptlrpc_service_part *svcpt;
1176                int i;
1177                int rc2;
1178
1179                if (!nrs_policy_compatible(svc, desc) ||
1180                    unlikely(svc->srv_is_stopping))
1181                        continue;
1182
1183                ptlrpc_service_for_each_part(svcpt, i, svc) {
1184                        struct ptlrpc_nrs *nrs;
1185                        bool hp = false;
1186again:
1187                        nrs = nrs_svcpt2nrs(svcpt, hp);
1188                        rc = nrs_policy_register(nrs, desc);
1189                        if (rc != 0) {
1190                                CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
1191                                       desc->pd_name, svcpt->scp_cpt,
1192                                       svcpt->scp_service->srv_name, rc);
1193
1194                                rc2 = nrs_policy_unregister_locked(desc);
1195                                /**
1196                                 * Should not fail at this point
1197                                 */
1198                                LASSERT(rc2 == 0);
1199                                mutex_unlock(&ptlrpc_all_services_mutex);
1200                                kfree(desc);
1201                                goto fail;
1202                        }
1203
1204                        if (!hp && nrs_svc_has_hp(svc)) {
1205                                hp = true;
1206                                goto again;
1207                        }
1208                }
1209
1210                /**
1211                 * No need to take a reference to other modules here, as we
1212                 * will be calling from the module's init() function.
1213                 */
1214                if (desc->pd_ops->op_lprocfs_init) {
1215                        rc = desc->pd_ops->op_lprocfs_init(svc);
1216                        if (rc != 0) {
1217                                rc2 = nrs_policy_unregister_locked(desc);
1218                                /**
1219                                 * Should not fail at this point
1220                                 */
1221                                LASSERT(rc2 == 0);
1222                                mutex_unlock(&ptlrpc_all_services_mutex);
1223                                kfree(desc);
1224                                goto fail;
1225                        }
1226                }
1227        }
1228
1229        mutex_unlock(&ptlrpc_all_services_mutex);
1230internal:
1231        list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
1232fail:
1233        mutex_unlock(&nrs_core.nrs_mutex);
1234
1235        return rc;
1236}
1237
1238/**
1239 * Setup NRS heads on all service partitions of service \a svc, and register
1240 * all compatible policies on those NRS heads.
1241 *
1242 * To be called from within ptl
1243 * \param[in] svc the service to setup
1244 *
1245 * \retval -ve error, the calling logic should eventually call
1246 *                    ptlrpc_service_nrs_cleanup() to undo any work performed
1247 *                    by this function.
1248 *
1249 * \see ptlrpc_register_service()
1250 * \see ptlrpc_service_nrs_cleanup()
1251 */
1252int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
1253{
1254        struct ptlrpc_service_part *svcpt;
1255        const struct ptlrpc_nrs_pol_desc *desc;
1256        int i;
1257        int rc = 0;
1258
1259        mutex_lock(&nrs_core.nrs_mutex);
1260
1261        /**
1262         * Initialize NRS heads on all service CPTs.
1263         */
1264        ptlrpc_service_for_each_part(svcpt, i, svc) {
1265                rc = nrs_svcpt_setup_locked(svcpt);
1266                if (rc != 0)
1267                        goto failed;
1268        }
1269
1270        /**
1271         * Set up lprocfs interfaces for all supported policies for the
1272         * service.
1273         */
1274        list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1275                if (!nrs_policy_compatible(svc, desc))
1276                        continue;
1277
1278                if (desc->pd_ops->op_lprocfs_init) {
1279                        rc = desc->pd_ops->op_lprocfs_init(svc);
1280                        if (rc != 0)
1281                                goto failed;
1282                }
1283        }
1284
1285failed:
1286
1287        mutex_unlock(&nrs_core.nrs_mutex);
1288
1289        return rc;
1290}
1291
1292/**
1293 * Unregisters all policies on all service partitions of service \a svc.
1294 *
1295 * \param[in] svc the PTLRPC service to unregister
1296 */
1297void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
1298{
1299        struct ptlrpc_service_part *svcpt;
1300        const struct ptlrpc_nrs_pol_desc *desc;
1301        int i;
1302
1303        mutex_lock(&nrs_core.nrs_mutex);
1304
1305        /**
1306         * Clean up NRS heads on all service partitions
1307         */
1308        ptlrpc_service_for_each_part(svcpt, i, svc)
1309                nrs_svcpt_cleanup_locked(svcpt);
1310
1311        /**
1312         * Clean up lprocfs interfaces for all supported policies for the
1313         * service.
1314         */
1315        list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1316                if (!nrs_policy_compatible(svc, desc))
1317                        continue;
1318
1319                if (desc->pd_ops->op_lprocfs_fini)
1320                        desc->pd_ops->op_lprocfs_fini(svc);
1321        }
1322
1323        mutex_unlock(&nrs_core.nrs_mutex);
1324}
1325
1326/**
1327 * Obtains NRS head resources for request \a req.
1328 *
1329 * These could be either on the regular or HP NRS head of \a svcpt; resources
1330 * taken on the regular head can later be swapped for HP head resources by
1331 * ldlm_lock_reorder_req().
1332 *
1333 * \param[in] svcpt the service partition
1334 * \param[in] req   the request
1335 * \param[in] hp    which NRS head of \a svcpt to use
1336 */
1337void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
1338                               struct ptlrpc_request *req, bool hp)
1339{
1340        struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1341
1342        memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
1343        nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
1344                              false);
1345
1346        /**
1347         * It is fine to access \e nr_initialized without locking as there is
1348         * no contention at this early stage.
1349         */
1350        req->rq_nrq.nr_initialized = 1;
1351}
1352
1353/**
1354 * Releases resources for a request; is called after the request has been
1355 * handled.
1356 *
1357 * \param[in] req the request
1358 *
1359 * \see ptlrpc_server_finish_request()
1360 */
1361void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
1362{
1363        if (req->rq_nrq.nr_initialized) {
1364                nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
1365                /* no protection on bit nr_initialized because no
1366                 * contention at this late stage
1367                 */
1368                req->rq_nrq.nr_finalized = 1;
1369        }
1370}
1371
1372void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
1373{
1374        if (req->rq_nrq.nr_started)
1375                nrs_request_stop(&req->rq_nrq);
1376}
1377
1378/**
1379 * Enqueues request \a req on either the regular or high-priority NRS head
1380 * of service partition \a svcpt.
1381 *
1382 * \param[in] svcpt the service partition
1383 * \param[in] req   the request to be enqueued
1384 * \param[in] hp    whether to enqueue the request on the regular or
1385 *                  high-priority NRS head.
1386 */
1387void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
1388                        struct ptlrpc_request *req, bool hp)
1389{
1390        spin_lock(&svcpt->scp_req_lock);
1391
1392        if (hp)
1393                ptlrpc_nrs_hpreq_add_nolock(req);
1394        else
1395                ptlrpc_nrs_req_add_nolock(req);
1396
1397        spin_unlock(&svcpt->scp_req_lock);
1398}
1399
1400static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
1401{
1402        LASSERT(policy->pol_nrs->nrs_req_queued > 0);
1403        LASSERT(policy->pol_req_queued > 0);
1404
1405        policy->pol_nrs->nrs_req_queued--;
1406        policy->pol_req_queued--;
1407
1408        /**
1409         * If the policy has no more requests queued, remove it from
1410         * ptlrpc_nrs::nrs_policy_queued.
1411         */
1412        if (unlikely(policy->pol_req_queued == 0)) {
1413                list_del_init(&policy->pol_list_queued);
1414
1415                /**
1416                 * If there are other policies with queued requests, move the
1417                 * current policy to the end so that we can round robin over
1418                 * all policies and drain the requests.
1419                 */
1420        } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
1421                LASSERT(policy->pol_req_queued <
1422                        policy->pol_nrs->nrs_req_queued);
1423
1424                list_move_tail(&policy->pol_list_queued,
1425                               &policy->pol_nrs->nrs_policy_queued);
1426        }
1427}
1428
1429/**
1430 * Obtains a request for handling from an NRS head of service partition
1431 * \a svcpt.
1432 *
1433 * \param[in] svcpt the service partition
1434 * \param[in] hp    whether to obtain a request from the regular or
1435 *                  high-priority NRS head.
1436 * \param[in] peek  when set, signifies that we just want to examine the
1437 *                  request, and not handle it, so the request is not removed
1438 *                  from the policy.
1439 * \param[in] force when set, it will force a policy to return a request if it
1440 *                  has one pending
1441 *
1442 * \retval the  request to be handled
1443 * \retval NULL the head has no requests to serve
1444 */
1445struct ptlrpc_request *
1446ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
1447                           bool peek, bool force)
1448{
1449        struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1450        struct ptlrpc_nrs_policy *policy;
1451        struct ptlrpc_nrs_request *nrq;
1452
1453        /**
1454         * Always try to drain requests from all NRS polices even if they are
1455         * inactive, because the user can change policy status at runtime.
1456         */
1457        list_for_each_entry(policy, &nrs->nrs_policy_queued, pol_list_queued) {
1458                nrq = nrs_request_get(policy, peek, force);
1459                if (nrq) {
1460                        if (likely(!peek)) {
1461                                nrq->nr_started = 1;
1462
1463                                policy->pol_req_started++;
1464                                policy->pol_nrs->nrs_req_started++;
1465
1466                                nrs_request_removed(policy);
1467                        }
1468
1469                        return container_of(nrq, struct ptlrpc_request, rq_nrq);
1470                }
1471        }
1472
1473        return NULL;
1474}
1475
1476/**
1477 * Returns whether there are any requests currently enqueued on any of the
1478 * policies of service partition's \a svcpt NRS head specified by \a hp. Should
1479 * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
1480 * result.
1481 *
1482 * \param[in] svcpt the service partition to enquire.
1483 * \param[in] hp    whether the regular or high-priority NRS head is to be
1484 *                  enquired.
1485 *
1486 * \retval false the indicated NRS head has no enqueued requests.
1487 * \retval true  the indicated NRS head has some enqueued requests.
1488 */
1489bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
1490{
1491        struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1492
1493        return nrs->nrs_req_queued > 0;
1494};
1495
1496/**
1497 * Carries out a control operation \a opc on the policy identified by the
1498 * human-readable \a name, on either all partitions, or only on the first
1499 * partition of service \a svc.
1500 *
1501 * \param[in]     svc    the service the policy belongs to.
1502 * \param[in]     queue  whether to carry out the command on the policy which
1503 *                       belongs to the regular, high-priority, or both NRS
1504 *                       heads of service partitions of \a svc.
1505 * \param[in]     name   the policy to act upon, by human-readable name
1506 * \param[in]     opc    the opcode of the operation to carry out
1507 * \param[in]     single when set, the operation will only be carried out on the
1508 *                       NRS heads of the first service partition of \a svc.
1509 *                       This is useful for some policies which e.g. share
1510 *                       identical values on the same parameters of different
1511 *                       service partitions; when reading these parameters via
1512 *                       lprocfs, these policies may just want to obtain and
1513 *                       print out the values from the first service partition.
1514 *                       Storing these values centrally elsewhere then could be
1515 *                       another solution for this.
1516 * \param[in,out] arg    can be used as a generic in/out buffer between control
1517 *                       operations and the user environment.
1518 *
1519 *\retval -ve error condition
1520 *\retval   0 operation was carried out successfully
1521 */
1522int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
1523                              enum ptlrpc_nrs_queue_type queue, char *name,
1524                              enum ptlrpc_nrs_ctl opc, bool single, void *arg)
1525{
1526        struct ptlrpc_service_part *svcpt;
1527        int i;
1528        int rc = 0;
1529
1530        LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
1531
1532        if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
1533                return -EINVAL;
1534
1535        ptlrpc_service_for_each_part(svcpt, i, svc) {
1536                if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
1537                        rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
1538                                            opc, arg);
1539                        if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
1540                                        single))
1541                                goto out;
1542                }
1543
1544                if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
1545                        /**
1546                         * XXX: We could optionally check for
1547                         * nrs_svc_has_hp(svc) here, and return an error if it
1548                         * is false. Right now we rely on the policies' lprocfs
1549                         * handlers that call the present function to make this
1550                         * check; if they fail to do so, they might hit the
1551                         * assertion inside nrs_svcpt2nrs() below.
1552                         */
1553                        rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
1554                                            opc, arg);
1555                        if (rc != 0 || single)
1556                                goto out;
1557                }
1558        }
1559out:
1560        return rc;
1561}
1562
1563/* ptlrpc/nrs_fifo.c */
1564extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
1565
1566/**
1567 * Adds all policies that ship with the ptlrpc module, to NRS core's list of
1568 * policies \e nrs_core.nrs_policies.
1569 *
1570 * \retval 0 all policies have been registered successfully
1571 * \retval -ve error
1572 */
1573int ptlrpc_nrs_init(void)
1574{
1575        int rc;
1576
1577        mutex_init(&nrs_core.nrs_mutex);
1578        INIT_LIST_HEAD(&nrs_core.nrs_policies);
1579
1580        rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
1581        if (rc != 0)
1582                goto fail;
1583
1584        return rc;
1585fail:
1586        /**
1587         * Since no PTLRPC services have been started at this point, all we need
1588         * to do for cleanup is to free the descriptors.
1589         */
1590        ptlrpc_nrs_fini();
1591
1592        return rc;
1593}
1594
1595/**
1596 * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
1597 * policy descriptors.
1598 *
1599 * Since all PTLRPC services are stopped at this point, there are no more
1600 * instances of any policies, because each service will have stopped its policy
1601 * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
1602 * descriptors here.
1603 */
1604void ptlrpc_nrs_fini(void)
1605{
1606        struct ptlrpc_nrs_pol_desc *desc;
1607        struct ptlrpc_nrs_pol_desc *tmp;
1608
1609        list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies, pd_list) {
1610                list_del_init(&desc->pd_list);
1611                kfree(desc);
1612        }
1613}
1614
1615/** @} nrs */
1616