linux/lib/sbitmap.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Facebook
   3 * Copyright (C) 2013-2014 Jens Axboe
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public
   7 * License v2 as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 * General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  16 */
  17
  18#include <linux/sched.h>
  19#include <linux/random.h>
  20#include <linux/sbitmap.h>
  21#include <linux/seq_file.h>
  22
  23int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
  24                      gfp_t flags, int node)
  25{
  26        unsigned int bits_per_word;
  27        unsigned int i;
  28
  29        if (shift < 0) {
  30                shift = ilog2(BITS_PER_LONG);
  31                /*
  32                 * If the bitmap is small, shrink the number of bits per word so
  33                 * we spread over a few cachelines, at least. If less than 4
  34                 * bits, just forget about it, it's not going to work optimally
  35                 * anyway.
  36                 */
  37                if (depth >= 4) {
  38                        while ((4U << shift) > depth)
  39                                shift--;
  40                }
  41        }
  42        bits_per_word = 1U << shift;
  43        if (bits_per_word > BITS_PER_LONG)
  44                return -EINVAL;
  45
  46        sb->shift = shift;
  47        sb->depth = depth;
  48        sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
  49
  50        if (depth == 0) {
  51                sb->map = NULL;
  52                return 0;
  53        }
  54
  55        sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
  56        if (!sb->map)
  57                return -ENOMEM;
  58
  59        for (i = 0; i < sb->map_nr; i++) {
  60                sb->map[i].depth = min(depth, bits_per_word);
  61                depth -= sb->map[i].depth;
  62        }
  63        return 0;
  64}
  65EXPORT_SYMBOL_GPL(sbitmap_init_node);
  66
  67void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
  68{
  69        unsigned int bits_per_word = 1U << sb->shift;
  70        unsigned int i;
  71
  72        sb->depth = depth;
  73        sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
  74
  75        for (i = 0; i < sb->map_nr; i++) {
  76                sb->map[i].depth = min(depth, bits_per_word);
  77                depth -= sb->map[i].depth;
  78        }
  79}
  80EXPORT_SYMBOL_GPL(sbitmap_resize);
  81
  82static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
  83                              unsigned int hint, bool wrap)
  84{
  85        unsigned int orig_hint = hint;
  86        int nr;
  87
  88        while (1) {
  89                nr = find_next_zero_bit(word, depth, hint);
  90                if (unlikely(nr >= depth)) {
  91                        /*
  92                         * We started with an offset, and we didn't reset the
  93                         * offset to 0 in a failure case, so start from 0 to
  94                         * exhaust the map.
  95                         */
  96                        if (orig_hint && hint && wrap) {
  97                                hint = orig_hint = 0;
  98                                continue;
  99                        }
 100                        return -1;
 101                }
 102
 103                if (!test_and_set_bit_lock(nr, word))
 104                        break;
 105
 106                hint = nr + 1;
 107                if (hint >= depth - 1)
 108                        hint = 0;
 109        }
 110
 111        return nr;
 112}
 113
 114int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
 115{
 116        unsigned int i, index;
 117        int nr = -1;
 118
 119        index = SB_NR_TO_INDEX(sb, alloc_hint);
 120
 121        for (i = 0; i < sb->map_nr; i++) {
 122                nr = __sbitmap_get_word(&sb->map[index].word,
 123                                        sb->map[index].depth,
 124                                        SB_NR_TO_BIT(sb, alloc_hint),
 125                                        !round_robin);
 126                if (nr != -1) {
 127                        nr += index << sb->shift;
 128                        break;
 129                }
 130
 131                /* Jump to next index. */
 132                index++;
 133                alloc_hint = index << sb->shift;
 134
 135                if (index >= sb->map_nr) {
 136                        index = 0;
 137                        alloc_hint = 0;
 138                }
 139        }
 140
 141        return nr;
 142}
 143EXPORT_SYMBOL_GPL(sbitmap_get);
 144
 145int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
 146                        unsigned long shallow_depth)
 147{
 148        unsigned int i, index;
 149        int nr = -1;
 150
 151        index = SB_NR_TO_INDEX(sb, alloc_hint);
 152
 153        for (i = 0; i < sb->map_nr; i++) {
 154                nr = __sbitmap_get_word(&sb->map[index].word,
 155                                        min(sb->map[index].depth, shallow_depth),
 156                                        SB_NR_TO_BIT(sb, alloc_hint), true);
 157                if (nr != -1) {
 158                        nr += index << sb->shift;
 159                        break;
 160                }
 161
 162                /* Jump to next index. */
 163                index++;
 164                alloc_hint = index << sb->shift;
 165
 166                if (index >= sb->map_nr) {
 167                        index = 0;
 168                        alloc_hint = 0;
 169                }
 170        }
 171
 172        return nr;
 173}
 174EXPORT_SYMBOL_GPL(sbitmap_get_shallow);
 175
 176bool sbitmap_any_bit_set(const struct sbitmap *sb)
 177{
 178        unsigned int i;
 179
 180        for (i = 0; i < sb->map_nr; i++) {
 181                if (sb->map[i].word)
 182                        return true;
 183        }
 184        return false;
 185}
 186EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
 187
 188bool sbitmap_any_bit_clear(const struct sbitmap *sb)
 189{
 190        unsigned int i;
 191
 192        for (i = 0; i < sb->map_nr; i++) {
 193                const struct sbitmap_word *word = &sb->map[i];
 194                unsigned long ret;
 195
 196                ret = find_first_zero_bit(&word->word, word->depth);
 197                if (ret < word->depth)
 198                        return true;
 199        }
 200        return false;
 201}
 202EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
 203
 204unsigned int sbitmap_weight(const struct sbitmap *sb)
 205{
 206        unsigned int i, weight = 0;
 207
 208        for (i = 0; i < sb->map_nr; i++) {
 209                const struct sbitmap_word *word = &sb->map[i];
 210
 211                weight += bitmap_weight(&word->word, word->depth);
 212        }
 213        return weight;
 214}
 215EXPORT_SYMBOL_GPL(sbitmap_weight);
 216
 217void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
 218{
 219        seq_printf(m, "depth=%u\n", sb->depth);
 220        seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
 221        seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
 222        seq_printf(m, "map_nr=%u\n", sb->map_nr);
 223}
 224EXPORT_SYMBOL_GPL(sbitmap_show);
 225
 226static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
 227{
 228        if ((offset & 0xf) == 0) {
 229                if (offset != 0)
 230                        seq_putc(m, '\n');
 231                seq_printf(m, "%08x:", offset);
 232        }
 233        if ((offset & 0x1) == 0)
 234                seq_putc(m, ' ');
 235        seq_printf(m, "%02x", byte);
 236}
 237
 238void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
 239{
 240        u8 byte = 0;
 241        unsigned int byte_bits = 0;
 242        unsigned int offset = 0;
 243        int i;
 244
 245        for (i = 0; i < sb->map_nr; i++) {
 246                unsigned long word = READ_ONCE(sb->map[i].word);
 247                unsigned int word_bits = READ_ONCE(sb->map[i].depth);
 248
 249                while (word_bits > 0) {
 250                        unsigned int bits = min(8 - byte_bits, word_bits);
 251
 252                        byte |= (word & (BIT(bits) - 1)) << byte_bits;
 253                        byte_bits += bits;
 254                        if (byte_bits == 8) {
 255                                emit_byte(m, offset, byte);
 256                                byte = 0;
 257                                byte_bits = 0;
 258                                offset++;
 259                        }
 260                        word >>= bits;
 261                        word_bits -= bits;
 262                }
 263        }
 264        if (byte_bits) {
 265                emit_byte(m, offset, byte);
 266                offset++;
 267        }
 268        if (offset)
 269                seq_putc(m, '\n');
 270}
 271EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
 272
 273static unsigned int sbq_calc_wake_batch(unsigned int depth)
 274{
 275        unsigned int wake_batch;
 276
 277        /*
 278         * For each batch, we wake up one queue. We need to make sure that our
 279         * batch size is small enough that the full depth of the bitmap is
 280         * enough to wake up all of the queues.
 281         */
 282        wake_batch = SBQ_WAKE_BATCH;
 283        if (wake_batch > depth / SBQ_WAIT_QUEUES)
 284                wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
 285
 286        return wake_batch;
 287}
 288
 289int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
 290                            int shift, bool round_robin, gfp_t flags, int node)
 291{
 292        int ret;
 293        int i;
 294
 295        ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
 296        if (ret)
 297                return ret;
 298
 299        sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
 300        if (!sbq->alloc_hint) {
 301                sbitmap_free(&sbq->sb);
 302                return -ENOMEM;
 303        }
 304
 305        if (depth && !round_robin) {
 306                for_each_possible_cpu(i)
 307                        *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
 308        }
 309
 310        sbq->wake_batch = sbq_calc_wake_batch(depth);
 311        atomic_set(&sbq->wake_index, 0);
 312
 313        sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
 314        if (!sbq->ws) {
 315                free_percpu(sbq->alloc_hint);
 316                sbitmap_free(&sbq->sb);
 317                return -ENOMEM;
 318        }
 319
 320        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
 321                init_waitqueue_head(&sbq->ws[i].wait);
 322                atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
 323        }
 324
 325        sbq->round_robin = round_robin;
 326        return 0;
 327}
 328EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
 329
 330void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
 331{
 332        unsigned int wake_batch = sbq_calc_wake_batch(depth);
 333        int i;
 334
 335        if (sbq->wake_batch != wake_batch) {
 336                WRITE_ONCE(sbq->wake_batch, wake_batch);
 337                /*
 338                 * Pairs with the memory barrier in sbq_wake_up() to ensure that
 339                 * the batch size is updated before the wait counts.
 340                 */
 341                smp_mb__before_atomic();
 342                for (i = 0; i < SBQ_WAIT_QUEUES; i++)
 343                        atomic_set(&sbq->ws[i].wait_cnt, 1);
 344        }
 345        sbitmap_resize(&sbq->sb, depth);
 346}
 347EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
 348
 349int __sbitmap_queue_get(struct sbitmap_queue *sbq)
 350{
 351        unsigned int hint, depth;
 352        int nr;
 353
 354        hint = this_cpu_read(*sbq->alloc_hint);
 355        depth = READ_ONCE(sbq->sb.depth);
 356        if (unlikely(hint >= depth)) {
 357                hint = depth ? prandom_u32() % depth : 0;
 358                this_cpu_write(*sbq->alloc_hint, hint);
 359        }
 360        nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
 361
 362        if (nr == -1) {
 363                /* If the map is full, a hint won't do us much good. */
 364                this_cpu_write(*sbq->alloc_hint, 0);
 365        } else if (nr == hint || unlikely(sbq->round_robin)) {
 366                /* Only update the hint if we used it. */
 367                hint = nr + 1;
 368                if (hint >= depth - 1)
 369                        hint = 0;
 370                this_cpu_write(*sbq->alloc_hint, hint);
 371        }
 372
 373        return nr;
 374}
 375EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
 376
 377int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
 378                                unsigned int shallow_depth)
 379{
 380        unsigned int hint, depth;
 381        int nr;
 382
 383        hint = this_cpu_read(*sbq->alloc_hint);
 384        depth = READ_ONCE(sbq->sb.depth);
 385        if (unlikely(hint >= depth)) {
 386                hint = depth ? prandom_u32() % depth : 0;
 387                this_cpu_write(*sbq->alloc_hint, hint);
 388        }
 389        nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth);
 390
 391        if (nr == -1) {
 392                /* If the map is full, a hint won't do us much good. */
 393                this_cpu_write(*sbq->alloc_hint, 0);
 394        } else if (nr == hint || unlikely(sbq->round_robin)) {
 395                /* Only update the hint if we used it. */
 396                hint = nr + 1;
 397                if (hint >= depth - 1)
 398                        hint = 0;
 399                this_cpu_write(*sbq->alloc_hint, hint);
 400        }
 401
 402        return nr;
 403}
 404EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
 405
 406static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 407{
 408        int i, wake_index;
 409
 410        wake_index = atomic_read(&sbq->wake_index);
 411        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
 412                struct sbq_wait_state *ws = &sbq->ws[wake_index];
 413
 414                if (waitqueue_active(&ws->wait)) {
 415                        int o = atomic_read(&sbq->wake_index);
 416
 417                        if (wake_index != o)
 418                                atomic_cmpxchg(&sbq->wake_index, o, wake_index);
 419                        return ws;
 420                }
 421
 422                wake_index = sbq_index_inc(wake_index);
 423        }
 424
 425        return NULL;
 426}
 427
 428static void sbq_wake_up(struct sbitmap_queue *sbq)
 429{
 430        struct sbq_wait_state *ws;
 431        unsigned int wake_batch;
 432        int wait_cnt;
 433
 434        /*
 435         * Pairs with the memory barrier in set_current_state() to ensure the
 436         * proper ordering of clear_bit()/waitqueue_active() in the waker and
 437         * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
 438         * waiter. See the comment on waitqueue_active(). This is __after_atomic
 439         * because we just did clear_bit_unlock() in the caller.
 440         */
 441        smp_mb__after_atomic();
 442
 443        ws = sbq_wake_ptr(sbq);
 444        if (!ws)
 445                return;
 446
 447        wait_cnt = atomic_dec_return(&ws->wait_cnt);
 448        if (wait_cnt <= 0) {
 449                wake_batch = READ_ONCE(sbq->wake_batch);
 450                /*
 451                 * Pairs with the memory barrier in sbitmap_queue_resize() to
 452                 * ensure that we see the batch size update before the wait
 453                 * count is reset.
 454                 */
 455                smp_mb__before_atomic();
 456                /*
 457                 * If there are concurrent callers to sbq_wake_up(), the last
 458                 * one to decrement the wait count below zero will bump it back
 459                 * up. If there is a concurrent resize, the count reset will
 460                 * either cause the cmpxchg to fail or overwrite after the
 461                 * cmpxchg.
 462                 */
 463                atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
 464                sbq_index_atomic_inc(&sbq->wake_index);
 465                wake_up_nr(&ws->wait, wake_batch);
 466        }
 467}
 468
 469void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 470                         unsigned int cpu)
 471{
 472        sbitmap_clear_bit_unlock(&sbq->sb, nr);
 473        sbq_wake_up(sbq);
 474        if (likely(!sbq->round_robin && nr < sbq->sb.depth))
 475                *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
 476}
 477EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
 478
 479void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
 480{
 481        int i, wake_index;
 482
 483        /*
 484         * Pairs with the memory barrier in set_current_state() like in
 485         * sbq_wake_up().
 486         */
 487        smp_mb();
 488        wake_index = atomic_read(&sbq->wake_index);
 489        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
 490                struct sbq_wait_state *ws = &sbq->ws[wake_index];
 491
 492                if (waitqueue_active(&ws->wait))
 493                        wake_up(&ws->wait);
 494
 495                wake_index = sbq_index_inc(wake_index);
 496        }
 497}
 498EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
 499
 500void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
 501{
 502        bool first;
 503        int i;
 504
 505        sbitmap_show(&sbq->sb, m);
 506
 507        seq_puts(m, "alloc_hint={");
 508        first = true;
 509        for_each_possible_cpu(i) {
 510                if (!first)
 511                        seq_puts(m, ", ");
 512                first = false;
 513                seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
 514        }
 515        seq_puts(m, "}\n");
 516
 517        seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
 518        seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
 519
 520        seq_puts(m, "ws={\n");
 521        for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
 522                struct sbq_wait_state *ws = &sbq->ws[i];
 523
 524                seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
 525                           atomic_read(&ws->wait_cnt),
 526                           waitqueue_active(&ws->wait) ? "active" : "inactive");
 527        }
 528        seq_puts(m, "}\n");
 529
 530        seq_printf(m, "round_robin=%d\n", sbq->round_robin);
 531}
 532EXPORT_SYMBOL_GPL(sbitmap_queue_show);
 533