linux/kernel/rcu/rcu_segcblist.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * RCU segmented callback lists, function definitions
   4 *
   5 * Copyright IBM Corporation, 2017
   6 *
   7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
   8 */
   9
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/interrupt.h>
  13#include <linux/rcupdate.h>
  14
  15#include "rcu_segcblist.h"
  16
  17/* Initialize simple callback list. */
  18void rcu_cblist_init(struct rcu_cblist *rclp)
  19{
  20        rclp->head = NULL;
  21        rclp->tail = &rclp->head;
  22        rclp->len = 0;
  23        rclp->len_lazy = 0;
  24}
  25
  26/*
  27 * Enqueue an rcu_head structure onto the specified callback list.
  28 * This function assumes that the callback is non-lazy because it
  29 * is intended for use by no-CBs CPUs, which do not distinguish
  30 * between lazy and non-lazy RCU callbacks.
  31 */
  32void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp)
  33{
  34        *rclp->tail = rhp;
  35        rclp->tail = &rhp->next;
  36        WRITE_ONCE(rclp->len, rclp->len + 1);
  37}
  38
  39/*
  40 * Flush the second rcu_cblist structure onto the first one, obliterating
  41 * any contents of the first.  If rhp is non-NULL, enqueue it as the sole
  42 * element of the second rcu_cblist structure, but ensuring that the second
  43 * rcu_cblist structure, if initially non-empty, always appears non-empty
  44 * throughout the process.  If rdp is NULL, the second rcu_cblist structure
  45 * is instead initialized to empty.
  46 */
  47void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp,
  48                              struct rcu_cblist *srclp,
  49                              struct rcu_head *rhp)
  50{
  51        drclp->head = srclp->head;
  52        if (drclp->head)
  53                drclp->tail = srclp->tail;
  54        else
  55                drclp->tail = &drclp->head;
  56        drclp->len = srclp->len;
  57        drclp->len_lazy = srclp->len_lazy;
  58        if (!rhp) {
  59                rcu_cblist_init(srclp);
  60        } else {
  61                rhp->next = NULL;
  62                srclp->head = rhp;
  63                srclp->tail = &rhp->next;
  64                WRITE_ONCE(srclp->len, 1);
  65                srclp->len_lazy = 0;
  66        }
  67}
  68
  69/*
  70 * Dequeue the oldest rcu_head structure from the specified callback
  71 * list.  This function assumes that the callback is non-lazy, but
  72 * the caller can later invoke rcu_cblist_dequeued_lazy() if it
  73 * finds otherwise (and if it cares about laziness).  This allows
  74 * different users to have different ways of determining laziness.
  75 */
  76struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
  77{
  78        struct rcu_head *rhp;
  79
  80        rhp = rclp->head;
  81        if (!rhp)
  82                return NULL;
  83        rclp->len--;
  84        rclp->head = rhp->next;
  85        if (!rclp->head)
  86                rclp->tail = &rclp->head;
  87        return rhp;
  88}
  89
  90/* Set the length of an rcu_segcblist structure. */
  91void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
  92{
  93#ifdef CONFIG_RCU_NOCB_CPU
  94        atomic_long_set(&rsclp->len, v);
  95#else
  96        WRITE_ONCE(rsclp->len, v);
  97#endif
  98}
  99
 100/*
 101 * Increase the numeric length of an rcu_segcblist structure by the
 102 * specified amount, which can be negative.  This can cause the ->len
 103 * field to disagree with the actual number of callbacks on the structure.
 104 * This increase is fully ordered with respect to the callers accesses
 105 * both before and after.
 106 */
 107void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
 108{
 109#ifdef CONFIG_RCU_NOCB_CPU
 110        smp_mb__before_atomic(); /* Up to the caller! */
 111        atomic_long_add(v, &rsclp->len);
 112        smp_mb__after_atomic(); /* Up to the caller! */
 113#else
 114        smp_mb(); /* Up to the caller! */
 115        WRITE_ONCE(rsclp->len, rsclp->len + v);
 116        smp_mb(); /* Up to the caller! */
 117#endif
 118}
 119
 120/*
 121 * Increase the numeric length of an rcu_segcblist structure by one.
 122 * This can cause the ->len field to disagree with the actual number of
 123 * callbacks on the structure.  This increase is fully ordered with respect
 124 * to the callers accesses both before and after.
 125 */
 126void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp)
 127{
 128        rcu_segcblist_add_len(rsclp, 1);
 129}
 130
 131/*
 132 * Exchange the numeric length of the specified rcu_segcblist structure
 133 * with the specified value.  This can cause the ->len field to disagree
 134 * with the actual number of callbacks on the structure.  This exchange is
 135 * fully ordered with respect to the callers accesses both before and after.
 136 */
 137long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v)
 138{
 139#ifdef CONFIG_RCU_NOCB_CPU
 140        return atomic_long_xchg(&rsclp->len, v);
 141#else
 142        long ret = rsclp->len;
 143
 144        smp_mb(); /* Up to the caller! */
 145        WRITE_ONCE(rsclp->len, v);
 146        smp_mb(); /* Up to the caller! */
 147        return ret;
 148#endif
 149}
 150
 151/*
 152 * Initialize an rcu_segcblist structure.
 153 */
 154void rcu_segcblist_init(struct rcu_segcblist *rsclp)
 155{
 156        int i;
 157
 158        BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
 159        BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
 160        rsclp->head = NULL;
 161        for (i = 0; i < RCU_CBLIST_NSEGS; i++)
 162                rsclp->tails[i] = &rsclp->head;
 163        rcu_segcblist_set_len(rsclp, 0);
 164        rsclp->len_lazy = 0;
 165        rsclp->enabled = 1;
 166}
 167
 168/*
 169 * Disable the specified rcu_segcblist structure, so that callbacks can
 170 * no longer be posted to it.  This structure must be empty.
 171 */
 172void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
 173{
 174        WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
 175        WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
 176        WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
 177        rsclp->enabled = 0;
 178}
 179
 180/*
 181 * Mark the specified rcu_segcblist structure as offloaded.  This
 182 * structure must be empty.
 183 */
 184void rcu_segcblist_offload(struct rcu_segcblist *rsclp)
 185{
 186        rsclp->offloaded = 1;
 187}
 188
 189/*
 190 * Does the specified rcu_segcblist structure contain callbacks that
 191 * are ready to be invoked?
 192 */
 193bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
 194{
 195        return rcu_segcblist_is_enabled(rsclp) &&
 196               &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
 197}
 198
 199/*
 200 * Does the specified rcu_segcblist structure contain callbacks that
 201 * are still pending, that is, not yet ready to be invoked?
 202 */
 203bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
 204{
 205        return rcu_segcblist_is_enabled(rsclp) &&
 206               !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
 207}
 208
 209/*
 210 * Return a pointer to the first callback in the specified rcu_segcblist
 211 * structure.  This is useful for diagnostics.
 212 */
 213struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
 214{
 215        if (rcu_segcblist_is_enabled(rsclp))
 216                return rsclp->head;
 217        return NULL;
 218}
 219
 220/*
 221 * Return a pointer to the first pending callback in the specified
 222 * rcu_segcblist structure.  This is useful just after posting a given
 223 * callback -- if that callback is the first pending callback, then
 224 * you cannot rely on someone else having already started up the required
 225 * grace period.
 226 */
 227struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
 228{
 229        if (rcu_segcblist_is_enabled(rsclp))
 230                return *rsclp->tails[RCU_DONE_TAIL];
 231        return NULL;
 232}
 233
 234/*
 235 * Return false if there are no CBs awaiting grace periods, otherwise,
 236 * return true and store the nearest waited-upon grace period into *lp.
 237 */
 238bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp)
 239{
 240        if (!rcu_segcblist_pend_cbs(rsclp))
 241                return false;
 242        *lp = rsclp->gp_seq[RCU_WAIT_TAIL];
 243        return true;
 244}
 245
 246/*
 247 * Enqueue the specified callback onto the specified rcu_segcblist
 248 * structure, updating accounting as needed.  Note that the ->len
 249 * field may be accessed locklessly, hence the WRITE_ONCE().
 250 * The ->len field is used by rcu_barrier() and friends to determine
 251 * if it must post a callback on this structure, and it is OK
 252 * for rcu_barrier() to sometimes post callbacks needlessly, but
 253 * absolutely not OK for it to ever miss posting a callback.
 254 */
 255void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
 256                           struct rcu_head *rhp, bool lazy)
 257{
 258        rcu_segcblist_inc_len(rsclp);
 259        if (lazy)
 260                rsclp->len_lazy++;
 261        smp_mb(); /* Ensure counts are updated before callback is enqueued. */
 262        rhp->next = NULL;
 263        WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp);
 264        WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next);
 265}
 266
 267/*
 268 * Entrain the specified callback onto the specified rcu_segcblist at
 269 * the end of the last non-empty segment.  If the entire rcu_segcblist
 270 * is empty, make no change, but return false.
 271 *
 272 * This is intended for use by rcu_barrier()-like primitives, -not-
 273 * for normal grace-period use.  IMPORTANT:  The callback you enqueue
 274 * will wait for all prior callbacks, NOT necessarily for a grace
 275 * period.  You have been warned.
 276 */
 277bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
 278                           struct rcu_head *rhp, bool lazy)
 279{
 280        int i;
 281
 282        if (rcu_segcblist_n_cbs(rsclp) == 0)
 283                return false;
 284        rcu_segcblist_inc_len(rsclp);
 285        if (lazy)
 286                rsclp->len_lazy++;
 287        smp_mb(); /* Ensure counts are updated before callback is entrained. */
 288        rhp->next = NULL;
 289        for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
 290                if (rsclp->tails[i] != rsclp->tails[i - 1])
 291                        break;
 292        WRITE_ONCE(*rsclp->tails[i], rhp);
 293        for (; i <= RCU_NEXT_TAIL; i++)
 294                WRITE_ONCE(rsclp->tails[i], &rhp->next);
 295        return true;
 296}
 297
 298/*
 299 * Extract only the counts from the specified rcu_segcblist structure,
 300 * and place them in the specified rcu_cblist structure.  This function
 301 * supports both callback orphaning and invocation, hence the separation
 302 * of counts and callbacks.  (Callbacks ready for invocation must be
 303 * orphaned and adopted separately from pending callbacks, but counts
 304 * apply to all callbacks.  Locking must be used to make sure that
 305 * both orphaned-callbacks lists are consistent.)
 306 */
 307void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
 308                                               struct rcu_cblist *rclp)
 309{
 310        rclp->len_lazy += rsclp->len_lazy;
 311        rsclp->len_lazy = 0;
 312        rclp->len = rcu_segcblist_xchg_len(rsclp, 0);
 313}
 314
 315/*
 316 * Extract only those callbacks ready to be invoked from the specified
 317 * rcu_segcblist structure and place them in the specified rcu_cblist
 318 * structure.
 319 */
 320void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
 321                                    struct rcu_cblist *rclp)
 322{
 323        int i;
 324
 325        if (!rcu_segcblist_ready_cbs(rsclp))
 326                return; /* Nothing to do. */
 327        *rclp->tail = rsclp->head;
 328        WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]);
 329        WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
 330        rclp->tail = rsclp->tails[RCU_DONE_TAIL];
 331        for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
 332                if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
 333                        WRITE_ONCE(rsclp->tails[i], &rsclp->head);
 334}
 335
 336/*
 337 * Extract only those callbacks still pending (not yet ready to be
 338 * invoked) from the specified rcu_segcblist structure and place them in
 339 * the specified rcu_cblist structure.  Note that this loses information
 340 * about any callbacks that might have been partway done waiting for
 341 * their grace period.  Too bad!  They will have to start over.
 342 */
 343void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
 344                                    struct rcu_cblist *rclp)
 345{
 346        int i;
 347
 348        if (!rcu_segcblist_pend_cbs(rsclp))
 349                return; /* Nothing to do. */
 350        *rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
 351        rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
 352        WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
 353        for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
 354                WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]);
 355}
 356
 357/*
 358 * Insert counts from the specified rcu_cblist structure in the
 359 * specified rcu_segcblist structure.
 360 */
 361void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
 362                                struct rcu_cblist *rclp)
 363{
 364        rsclp->len_lazy += rclp->len_lazy;
 365        rcu_segcblist_add_len(rsclp, rclp->len);
 366        rclp->len_lazy = 0;
 367        rclp->len = 0;
 368}
 369
 370/*
 371 * Move callbacks from the specified rcu_cblist to the beginning of the
 372 * done-callbacks segment of the specified rcu_segcblist.
 373 */
 374void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
 375                                   struct rcu_cblist *rclp)
 376{
 377        int i;
 378
 379        if (!rclp->head)
 380                return; /* No callbacks to move. */
 381        *rclp->tail = rsclp->head;
 382        WRITE_ONCE(rsclp->head, rclp->head);
 383        for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
 384                if (&rsclp->head == rsclp->tails[i])
 385                        WRITE_ONCE(rsclp->tails[i], rclp->tail);
 386                else
 387                        break;
 388        rclp->head = NULL;
 389        rclp->tail = &rclp->head;
 390}
 391
 392/*
 393 * Move callbacks from the specified rcu_cblist to the end of the
 394 * new-callbacks segment of the specified rcu_segcblist.
 395 */
 396void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
 397                                   struct rcu_cblist *rclp)
 398{
 399        if (!rclp->head)
 400                return; /* Nothing to do. */
 401        WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head);
 402        WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail);
 403        rclp->head = NULL;
 404        rclp->tail = &rclp->head;
 405}
 406
 407/*
 408 * Advance the callbacks in the specified rcu_segcblist structure based
 409 * on the current value passed in for the grace-period counter.
 410 */
 411void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
 412{
 413        int i, j;
 414
 415        WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
 416        if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
 417                return;
 418
 419        /*
 420         * Find all callbacks whose ->gp_seq numbers indicate that they
 421         * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
 422         */
 423        for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
 424                if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
 425                        break;
 426                WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]);
 427        }
 428
 429        /* If no callbacks moved, nothing more need be done. */
 430        if (i == RCU_WAIT_TAIL)
 431                return;
 432
 433        /* Clean up tail pointers that might have been misordered above. */
 434        for (j = RCU_WAIT_TAIL; j < i; j++)
 435                WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]);
 436
 437        /*
 438         * Callbacks moved, so clean up the misordered ->tails[] pointers
 439         * that now point into the middle of the list of ready-to-invoke
 440         * callbacks.  The overall effect is to copy down the later pointers
 441         * into the gap that was created by the now-ready segments.
 442         */
 443        for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
 444                if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
 445                        break;  /* No more callbacks. */
 446                WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]);
 447                rsclp->gp_seq[j] = rsclp->gp_seq[i];
 448        }
 449}
 450
 451/*
 452 * "Accelerate" callbacks based on more-accurate grace-period information.
 453 * The reason for this is that RCU does not synchronize the beginnings and
 454 * ends of grace periods, and that callbacks are posted locally.  This in
 455 * turn means that the callbacks must be labelled conservatively early
 456 * on, as getting exact information would degrade both performance and
 457 * scalability.  When more accurate grace-period information becomes
 458 * available, previously posted callbacks can be "accelerated", marking
 459 * them to complete at the end of the earlier grace period.
 460 *
 461 * This function operates on an rcu_segcblist structure, and also the
 462 * grace-period sequence number seq at which new callbacks would become
 463 * ready to invoke.  Returns true if there are callbacks that won't be
 464 * ready to invoke until seq, false otherwise.
 465 */
 466bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
 467{
 468        int i;
 469
 470        WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
 471        if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
 472                return false;
 473
 474        /*
 475         * Find the segment preceding the oldest segment of callbacks
 476         * whose ->gp_seq[] completion is at or after that passed in via
 477         * "seq", skipping any empty segments.  This oldest segment, along
 478         * with any later segments, can be merged in with any newly arrived
 479         * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
 480         * as their ->gp_seq[] grace-period completion sequence number.
 481         */
 482        for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
 483                if (rsclp->tails[i] != rsclp->tails[i - 1] &&
 484                    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
 485                        break;
 486
 487        /*
 488         * If all the segments contain callbacks that correspond to
 489         * earlier grace-period sequence numbers than "seq", leave.
 490         * Assuming that the rcu_segcblist structure has enough
 491         * segments in its arrays, this can only happen if some of
 492         * the non-done segments contain callbacks that really are
 493         * ready to invoke.  This situation will get straightened
 494         * out by the next call to rcu_segcblist_advance().
 495         *
 496         * Also advance to the oldest segment of callbacks whose
 497         * ->gp_seq[] completion is at or after that passed in via "seq",
 498         * skipping any empty segments.
 499         */
 500        if (++i >= RCU_NEXT_TAIL)
 501                return false;
 502
 503        /*
 504         * Merge all later callbacks, including newly arrived callbacks,
 505         * into the segment located by the for-loop above.  Assign "seq"
 506         * as the ->gp_seq[] value in order to correctly handle the case
 507         * where there were no pending callbacks in the rcu_segcblist
 508         * structure other than in the RCU_NEXT_TAIL segment.
 509         */
 510        for (; i < RCU_NEXT_TAIL; i++) {
 511                WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]);
 512                rsclp->gp_seq[i] = seq;
 513        }
 514        return true;
 515}
 516
 517/*
 518 * Merge the source rcu_segcblist structure into the destination
 519 * rcu_segcblist structure, then initialize the source.  Any pending
 520 * callbacks from the source get to start over.  It is best to
 521 * advance and accelerate both the destination and the source
 522 * before merging.
 523 */
 524void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
 525                         struct rcu_segcblist *src_rsclp)
 526{
 527        struct rcu_cblist donecbs;
 528        struct rcu_cblist pendcbs;
 529
 530        rcu_cblist_init(&donecbs);
 531        rcu_cblist_init(&pendcbs);
 532        rcu_segcblist_extract_count(src_rsclp, &donecbs);
 533        rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs);
 534        rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs);
 535        rcu_segcblist_insert_count(dst_rsclp, &donecbs);
 536        rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs);
 537        rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs);
 538        rcu_segcblist_init(src_rsclp);
 539}
 540