linux/drivers/lightnvm/pblk-rb.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 CNEX Labs
   3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
   4 *
   5 * Based upon the circular ringbuffer.
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version
   9 * 2 as published by the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope that it will be useful, but
  12 * WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * General Public License for more details.
  15 *
  16 * pblk-rb.c - pblk's write buffer
  17 */
  18
  19#include <linux/circ_buf.h>
  20
  21#include "pblk.h"
  22
  23static DECLARE_RWSEM(pblk_rb_lock);
  24
  25void pblk_rb_data_free(struct pblk_rb *rb)
  26{
  27        struct pblk_rb_pages *p, *t;
  28
  29        down_write(&pblk_rb_lock);
  30        list_for_each_entry_safe(p, t, &rb->pages, list) {
  31                free_pages((unsigned long)page_address(p->pages), p->order);
  32                list_del(&p->list);
  33                kfree(p);
  34        }
  35        up_write(&pblk_rb_lock);
  36}
  37
  38/*
  39 * Initialize ring buffer. The data and metadata buffers must be previously
  40 * allocated and their size must be a power of two
  41 * (Documentation/circular-buffers.txt)
  42 */
  43int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
  44                 unsigned int power_size, unsigned int power_seg_sz)
  45{
  46        struct pblk *pblk = container_of(rb, struct pblk, rwb);
  47        unsigned int init_entry = 0;
  48        unsigned int alloc_order = power_size;
  49        unsigned int max_order = MAX_ORDER - 1;
  50        unsigned int order, iter;
  51
  52        down_write(&pblk_rb_lock);
  53        rb->entries = rb_entry_base;
  54        rb->seg_size = (1 << power_seg_sz);
  55        rb->nr_entries = (1 << power_size);
  56        rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
  57        rb->sync_point = EMPTY_ENTRY;
  58
  59        spin_lock_init(&rb->w_lock);
  60        spin_lock_init(&rb->s_lock);
  61
  62        INIT_LIST_HEAD(&rb->pages);
  63
  64        if (alloc_order >= max_order) {
  65                order = max_order;
  66                iter = (1 << (alloc_order - max_order));
  67        } else {
  68                order = alloc_order;
  69                iter = 1;
  70        }
  71
  72        do {
  73                struct pblk_rb_entry *entry;
  74                struct pblk_rb_pages *page_set;
  75                void *kaddr;
  76                unsigned long set_size;
  77                int i;
  78
  79                page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
  80                if (!page_set) {
  81                        up_write(&pblk_rb_lock);
  82                        return -ENOMEM;
  83                }
  84
  85                page_set->order = order;
  86                page_set->pages = alloc_pages(GFP_KERNEL, order);
  87                if (!page_set->pages) {
  88                        kfree(page_set);
  89                        pblk_rb_data_free(rb);
  90                        up_write(&pblk_rb_lock);
  91                        return -ENOMEM;
  92                }
  93                kaddr = page_address(page_set->pages);
  94
  95                entry = &rb->entries[init_entry];
  96                entry->data = kaddr;
  97                entry->cacheline = pblk_cacheline_to_addr(init_entry++);
  98                entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
  99
 100                set_size = (1 << order);
 101                for (i = 1; i < set_size; i++) {
 102                        entry = &rb->entries[init_entry];
 103                        entry->cacheline = pblk_cacheline_to_addr(init_entry++);
 104                        entry->data = kaddr + (i * rb->seg_size);
 105                        entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
 106                        bio_list_init(&entry->w_ctx.bios);
 107                }
 108
 109                list_add_tail(&page_set->list, &rb->pages);
 110                iter--;
 111        } while (iter > 0);
 112        up_write(&pblk_rb_lock);
 113
 114#ifdef CONFIG_NVM_DEBUG
 115        atomic_set(&rb->inflight_sync_point, 0);
 116#endif
 117
 118        /*
 119         * Initialize rate-limiter, which controls access to the write buffer
 120         * but user and GC I/O
 121         */
 122        pblk_rl_init(&pblk->rl, rb->nr_entries);
 123
 124        return 0;
 125}
 126
 127/*
 128 * pblk_rb_calculate_size -- calculate the size of the write buffer
 129 */
 130unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
 131{
 132        /* Alloc a write buffer that can at least fit 128 entries */
 133        return (1 << max(get_count_order(nr_entries), 7));
 134}
 135
 136void *pblk_rb_entries_ref(struct pblk_rb *rb)
 137{
 138        return rb->entries;
 139}
 140
 141static void clean_wctx(struct pblk_w_ctx *w_ctx)
 142{
 143        int flags;
 144
 145try:
 146        flags = READ_ONCE(w_ctx->flags);
 147        if (!(flags & PBLK_SUBMITTED_ENTRY))
 148                goto try;
 149
 150        /* Release flags on context. Protect from writes and reads */
 151        smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
 152        pblk_ppa_set_empty(&w_ctx->ppa);
 153        w_ctx->lba = ADDR_EMPTY;
 154}
 155
 156#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
 157#define pblk_rb_ring_space(rb, head, tail, size) \
 158                                        (CIRC_SPACE(head, tail, size))
 159
 160/*
 161 * Buffer space is calculated with respect to the back pointer signaling
 162 * synchronized entries to the media.
 163 */
 164static unsigned int pblk_rb_space(struct pblk_rb *rb)
 165{
 166        unsigned int mem = READ_ONCE(rb->mem);
 167        unsigned int sync = READ_ONCE(rb->sync);
 168
 169        return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
 170}
 171
 172/*
 173 * Buffer count is calculated with respect to the submission entry signaling the
 174 * entries that are available to send to the media
 175 */
 176unsigned int pblk_rb_read_count(struct pblk_rb *rb)
 177{
 178        unsigned int mem = READ_ONCE(rb->mem);
 179        unsigned int subm = READ_ONCE(rb->subm);
 180
 181        return pblk_rb_ring_count(mem, subm, rb->nr_entries);
 182}
 183
 184unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
 185{
 186        unsigned int mem = READ_ONCE(rb->mem);
 187        unsigned int sync = READ_ONCE(rb->sync);
 188
 189        return pblk_rb_ring_count(mem, sync, rb->nr_entries);
 190}
 191
 192unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
 193{
 194        unsigned int subm;
 195
 196        subm = READ_ONCE(rb->subm);
 197        /* Commit read means updating submission pointer */
 198        smp_store_release(&rb->subm,
 199                                (subm + nr_entries) & (rb->nr_entries - 1));
 200
 201        return subm;
 202}
 203
 204static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
 205{
 206        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 207        struct pblk_line *line;
 208        struct pblk_rb_entry *entry;
 209        struct pblk_w_ctx *w_ctx;
 210        unsigned int user_io = 0, gc_io = 0;
 211        unsigned int i;
 212        int flags;
 213
 214        for (i = 0; i < to_update; i++) {
 215                entry = &rb->entries[rb->l2p_update];
 216                w_ctx = &entry->w_ctx;
 217
 218                flags = READ_ONCE(entry->w_ctx.flags);
 219                if (flags & PBLK_IOTYPE_USER)
 220                        user_io++;
 221                else if (flags & PBLK_IOTYPE_GC)
 222                        gc_io++;
 223                else
 224                        WARN(1, "pblk: unknown IO type\n");
 225
 226                pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
 227                                                        entry->cacheline);
 228
 229                line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)];
 230                kref_put(&line->ref, pblk_line_put);
 231                clean_wctx(w_ctx);
 232                rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1);
 233        }
 234
 235        pblk_rl_out(&pblk->rl, user_io, gc_io);
 236
 237        return 0;
 238}
 239
 240/*
 241 * When we move the l2p_update pointer, we update the l2p table - lookups will
 242 * point to the physical address instead of to the cacheline in the write buffer
 243 * from this moment on.
 244 */
 245static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
 246                              unsigned int mem, unsigned int sync)
 247{
 248        unsigned int space, count;
 249        int ret = 0;
 250
 251        lockdep_assert_held(&rb->w_lock);
 252
 253        /* Update l2p only as buffer entries are being overwritten */
 254        space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
 255        if (space > nr_entries)
 256                goto out;
 257
 258        count = nr_entries - space;
 259        /* l2p_update used exclusively under rb->w_lock */
 260        ret = __pblk_rb_update_l2p(rb, count);
 261
 262out:
 263        return ret;
 264}
 265
 266/*
 267 * Update the l2p entry for all sectors stored on the write buffer. This means
 268 * that all future lookups to the l2p table will point to a device address, not
 269 * to the cacheline in the write buffer.
 270 */
 271void pblk_rb_sync_l2p(struct pblk_rb *rb)
 272{
 273        unsigned int sync;
 274        unsigned int to_update;
 275
 276        spin_lock(&rb->w_lock);
 277
 278        /* Protect from reads and writes */
 279        sync = smp_load_acquire(&rb->sync);
 280
 281        to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
 282        __pblk_rb_update_l2p(rb, to_update);
 283
 284        spin_unlock(&rb->w_lock);
 285}
 286
 287/*
 288 * Write @nr_entries to ring buffer from @data buffer if there is enough space.
 289 * Typically, 4KB data chunks coming from a bio will be copied to the ring
 290 * buffer, thus the write will fail if not all incoming data can be copied.
 291 *
 292 */
 293static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
 294                                  struct pblk_w_ctx w_ctx,
 295                                  struct pblk_rb_entry *entry)
 296{
 297        memcpy(entry->data, data, rb->seg_size);
 298
 299        entry->w_ctx.lba = w_ctx.lba;
 300        entry->w_ctx.ppa = w_ctx.ppa;
 301}
 302
 303void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
 304                              struct pblk_w_ctx w_ctx, unsigned int ring_pos)
 305{
 306        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 307        struct pblk_rb_entry *entry;
 308        int flags;
 309
 310        entry = &rb->entries[ring_pos];
 311        flags = READ_ONCE(entry->w_ctx.flags);
 312#ifdef CONFIG_NVM_DEBUG
 313        /* Caller must guarantee that the entry is free */
 314        BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
 315#endif
 316
 317        __pblk_rb_write_entry(rb, data, w_ctx, entry);
 318
 319        pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
 320        flags = w_ctx.flags | PBLK_WRITTEN_DATA;
 321
 322        /* Release flags on write context. Protect from writes */
 323        smp_store_release(&entry->w_ctx.flags, flags);
 324}
 325
 326void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
 327                            struct pblk_w_ctx w_ctx, struct pblk_line *line,
 328                            u64 paddr, unsigned int ring_pos)
 329{
 330        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 331        struct pblk_rb_entry *entry;
 332        int flags;
 333
 334        entry = &rb->entries[ring_pos];
 335        flags = READ_ONCE(entry->w_ctx.flags);
 336#ifdef CONFIG_NVM_DEBUG
 337        /* Caller must guarantee that the entry is free */
 338        BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
 339#endif
 340
 341        __pblk_rb_write_entry(rb, data, w_ctx, entry);
 342
 343        if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
 344                entry->w_ctx.lba = ADDR_EMPTY;
 345
 346        flags = w_ctx.flags | PBLK_WRITTEN_DATA;
 347
 348        /* Release flags on write context. Protect from writes */
 349        smp_store_release(&entry->w_ctx.flags, flags);
 350}
 351
 352static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
 353                                  unsigned int pos)
 354{
 355        struct pblk_rb_entry *entry;
 356        unsigned int subm, sync_point;
 357
 358        subm = READ_ONCE(rb->subm);
 359
 360#ifdef CONFIG_NVM_DEBUG
 361        atomic_inc(&rb->inflight_sync_point);
 362#endif
 363
 364        if (pos == subm)
 365                return 0;
 366
 367        sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
 368        entry = &rb->entries[sync_point];
 369
 370        /* Protect syncs */
 371        smp_store_release(&rb->sync_point, sync_point);
 372
 373        if (!bio)
 374                return 0;
 375
 376        spin_lock_irq(&rb->s_lock);
 377        bio_list_add(&entry->w_ctx.bios, bio);
 378        spin_unlock_irq(&rb->s_lock);
 379
 380        return 1;
 381}
 382
 383static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
 384                               unsigned int *pos)
 385{
 386        unsigned int mem;
 387        unsigned int sync;
 388
 389        sync = READ_ONCE(rb->sync);
 390        mem = READ_ONCE(rb->mem);
 391
 392        if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries)
 393                return 0;
 394
 395        if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
 396                return 0;
 397
 398        *pos = mem;
 399
 400        return 1;
 401}
 402
 403static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
 404                             unsigned int *pos)
 405{
 406        if (!__pblk_rb_may_write(rb, nr_entries, pos))
 407                return 0;
 408
 409        /* Protect from read count */
 410        smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1));
 411        return 1;
 412}
 413
 414void pblk_rb_flush(struct pblk_rb *rb)
 415{
 416        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 417        unsigned int mem = READ_ONCE(rb->mem);
 418
 419        if (pblk_rb_sync_point_set(rb, NULL, mem))
 420                return;
 421
 422        pblk_write_should_kick(pblk);
 423}
 424
 425static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
 426                                   unsigned int *pos, struct bio *bio,
 427                                   int *io_ret)
 428{
 429        unsigned int mem;
 430
 431        if (!__pblk_rb_may_write(rb, nr_entries, pos))
 432                return 0;
 433
 434        mem = (*pos + nr_entries) & (rb->nr_entries - 1);
 435        *io_ret = NVM_IO_DONE;
 436
 437        if (bio->bi_opf & REQ_PREFLUSH) {
 438                struct pblk *pblk = container_of(rb, struct pblk, rwb);
 439
 440#ifdef CONFIG_NVM_DEBUG
 441                atomic_long_inc(&pblk->nr_flush);
 442#endif
 443                if (pblk_rb_sync_point_set(&pblk->rwb, bio, mem))
 444                        *io_ret = NVM_IO_OK;
 445        }
 446
 447        /* Protect from read count */
 448        smp_store_release(&rb->mem, mem);
 449
 450        return 1;
 451}
 452
 453/*
 454 * Atomically check that (i) there is space on the write buffer for the
 455 * incoming I/O, and (ii) the current I/O type has enough budget in the write
 456 * buffer (rate-limiter).
 457 */
 458int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
 459                           unsigned int nr_entries, unsigned int *pos)
 460{
 461        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 462        int io_ret;
 463
 464        spin_lock(&rb->w_lock);
 465        io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
 466        if (io_ret) {
 467                spin_unlock(&rb->w_lock);
 468                return io_ret;
 469        }
 470
 471        if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
 472                spin_unlock(&rb->w_lock);
 473                return NVM_IO_REQUEUE;
 474        }
 475
 476        pblk_rl_user_in(&pblk->rl, nr_entries);
 477        spin_unlock(&rb->w_lock);
 478
 479        return io_ret;
 480}
 481
 482/*
 483 * Look at pblk_rb_may_write_user comment
 484 */
 485int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
 486                         unsigned int *pos)
 487{
 488        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 489
 490        spin_lock(&rb->w_lock);
 491        if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
 492                spin_unlock(&rb->w_lock);
 493                return 0;
 494        }
 495
 496        if (!pblk_rb_may_write(rb, nr_entries, pos)) {
 497                spin_unlock(&rb->w_lock);
 498                return 0;
 499        }
 500
 501        pblk_rl_gc_in(&pblk->rl, nr_entries);
 502        spin_unlock(&rb->w_lock);
 503
 504        return 1;
 505}
 506
 507/*
 508 * The caller of this function must ensure that the backpointer will not
 509 * overwrite the entries passed on the list.
 510 */
 511unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
 512                                      struct list_head *list,
 513                                      unsigned int max)
 514{
 515        struct pblk_rb_entry *entry, *tentry;
 516        struct page *page;
 517        unsigned int read = 0;
 518        int ret;
 519
 520        list_for_each_entry_safe(entry, tentry, list, index) {
 521                if (read > max) {
 522                        pr_err("pblk: too many entries on list\n");
 523                        goto out;
 524                }
 525
 526                page = virt_to_page(entry->data);
 527                if (!page) {
 528                        pr_err("pblk: could not allocate write bio page\n");
 529                        goto out;
 530                }
 531
 532                ret = bio_add_page(bio, page, rb->seg_size, 0);
 533                if (ret != rb->seg_size) {
 534                        pr_err("pblk: could not add page to write bio\n");
 535                        goto out;
 536                }
 537
 538                list_del(&entry->index);
 539                read++;
 540        }
 541
 542out:
 543        return read;
 544}
 545
 546/*
 547 * Read available entries on rb and add them to the given bio. To avoid a memory
 548 * copy, a page reference to the write buffer is used to be added to the bio.
 549 *
 550 * This function is used by the write thread to form the write bio that will
 551 * persist data on the write buffer to the media.
 552 */
 553unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
 554                                 unsigned int pos, unsigned int nr_entries,
 555                                 unsigned int count)
 556{
 557        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 558        struct request_queue *q = pblk->dev->q;
 559        struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
 560        struct bio *bio = rqd->bio;
 561        struct pblk_rb_entry *entry;
 562        struct page *page;
 563        unsigned int pad = 0, to_read = nr_entries;
 564        unsigned int i;
 565        int flags;
 566
 567        if (count < nr_entries) {
 568                pad = nr_entries - count;
 569                to_read = count;
 570        }
 571
 572        c_ctx->sentry = pos;
 573        c_ctx->nr_valid = to_read;
 574        c_ctx->nr_padded = pad;
 575
 576        for (i = 0; i < to_read; i++) {
 577                entry = &rb->entries[pos];
 578
 579                /* A write has been allowed into the buffer, but data is still
 580                 * being copied to it. It is ok to busy wait.
 581                 */
 582try:
 583                flags = READ_ONCE(entry->w_ctx.flags);
 584                if (!(flags & PBLK_WRITTEN_DATA)) {
 585                        io_schedule();
 586                        goto try;
 587                }
 588
 589                page = virt_to_page(entry->data);
 590                if (!page) {
 591                        pr_err("pblk: could not allocate write bio page\n");
 592                        flags &= ~PBLK_WRITTEN_DATA;
 593                        flags |= PBLK_SUBMITTED_ENTRY;
 594                        /* Release flags on context. Protect from writes */
 595                        smp_store_release(&entry->w_ctx.flags, flags);
 596                        return NVM_IO_ERR;
 597                }
 598
 599                if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
 600                                                                rb->seg_size) {
 601                        pr_err("pblk: could not add page to write bio\n");
 602                        flags &= ~PBLK_WRITTEN_DATA;
 603                        flags |= PBLK_SUBMITTED_ENTRY;
 604                        /* Release flags on context. Protect from writes */
 605                        smp_store_release(&entry->w_ctx.flags, flags);
 606                        return NVM_IO_ERR;
 607                }
 608
 609                if (flags & PBLK_FLUSH_ENTRY) {
 610                        unsigned int sync_point;
 611
 612                        sync_point = READ_ONCE(rb->sync_point);
 613                        if (sync_point == pos) {
 614                                /* Protect syncs */
 615                                smp_store_release(&rb->sync_point, EMPTY_ENTRY);
 616                        }
 617
 618                        flags &= ~PBLK_FLUSH_ENTRY;
 619#ifdef CONFIG_NVM_DEBUG
 620                        atomic_dec(&rb->inflight_sync_point);
 621#endif
 622                }
 623
 624                flags &= ~PBLK_WRITTEN_DATA;
 625                flags |= PBLK_SUBMITTED_ENTRY;
 626
 627                /* Release flags on context. Protect from writes */
 628                smp_store_release(&entry->w_ctx.flags, flags);
 629
 630                pos = (pos + 1) & (rb->nr_entries - 1);
 631        }
 632
 633        if (pad) {
 634                if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
 635                        pr_err("pblk: could not pad page in write bio\n");
 636                        return NVM_IO_ERR;
 637                }
 638        }
 639
 640#ifdef CONFIG_NVM_DEBUG
 641        atomic_long_add(pad, &((struct pblk *)
 642                        (container_of(rb, struct pblk, rwb)))->padded_writes);
 643#endif
 644
 645        return NVM_IO_OK;
 646}
 647
 648/*
 649 * Copy to bio only if the lba matches the one on the given cache entry.
 650 * Otherwise, it means that the entry has been overwritten, and the bio should
 651 * be directed to disk.
 652 */
 653int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
 654                        struct ppa_addr ppa, int bio_iter, bool advanced_bio)
 655{
 656        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 657        struct pblk_rb_entry *entry;
 658        struct pblk_w_ctx *w_ctx;
 659        struct ppa_addr l2p_ppa;
 660        u64 pos = pblk_addr_to_cacheline(ppa);
 661        void *data;
 662        int flags;
 663        int ret = 1;
 664
 665
 666#ifdef CONFIG_NVM_DEBUG
 667        /* Caller must ensure that the access will not cause an overflow */
 668        BUG_ON(pos >= rb->nr_entries);
 669#endif
 670        entry = &rb->entries[pos];
 671        w_ctx = &entry->w_ctx;
 672        flags = READ_ONCE(w_ctx->flags);
 673
 674        spin_lock(&rb->w_lock);
 675        spin_lock(&pblk->trans_lock);
 676        l2p_ppa = pblk_trans_map_get(pblk, lba);
 677        spin_unlock(&pblk->trans_lock);
 678
 679        /* Check if the entry has been overwritten or is scheduled to be */
 680        if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
 681                                                flags & PBLK_WRITABLE_ENTRY) {
 682                ret = 0;
 683                goto out;
 684        }
 685
 686        /* Only advance the bio if it hasn't been advanced already. If advanced,
 687         * this bio is at least a partial bio (i.e., it has partially been
 688         * filled with data from the cache). If part of the data resides on the
 689         * media, we will read later on
 690         */
 691        if (unlikely(!advanced_bio))
 692                bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
 693
 694        data = bio_data(bio);
 695        memcpy(data, entry->data, rb->seg_size);
 696
 697out:
 698        spin_unlock(&rb->w_lock);
 699        return ret;
 700}
 701
 702struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
 703{
 704        unsigned int entry = pos & (rb->nr_entries - 1);
 705
 706        return &rb->entries[entry].w_ctx;
 707}
 708
 709unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
 710        __acquires(&rb->s_lock)
 711{
 712        if (flags)
 713                spin_lock_irqsave(&rb->s_lock, *flags);
 714        else
 715                spin_lock_irq(&rb->s_lock);
 716
 717        return rb->sync;
 718}
 719
 720void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
 721        __releases(&rb->s_lock)
 722{
 723        lockdep_assert_held(&rb->s_lock);
 724
 725        if (flags)
 726                spin_unlock_irqrestore(&rb->s_lock, *flags);
 727        else
 728                spin_unlock_irq(&rb->s_lock);
 729}
 730
 731unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
 732{
 733        unsigned int sync;
 734        unsigned int i;
 735
 736        lockdep_assert_held(&rb->s_lock);
 737
 738        sync = READ_ONCE(rb->sync);
 739
 740        for (i = 0; i < nr_entries; i++)
 741                sync = (sync + 1) & (rb->nr_entries - 1);
 742
 743        /* Protect from counts */
 744        smp_store_release(&rb->sync, sync);
 745
 746        return sync;
 747}
 748
 749unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb)
 750{
 751        unsigned int subm, sync_point;
 752        unsigned int count;
 753
 754        /* Protect syncs */
 755        sync_point = smp_load_acquire(&rb->sync_point);
 756        if (sync_point == EMPTY_ENTRY)
 757                return 0;
 758
 759        subm = READ_ONCE(rb->subm);
 760
 761        /* The sync point itself counts as a sector to sync */
 762        count = pblk_rb_ring_count(sync_point, subm, rb->nr_entries) + 1;
 763
 764        return count;
 765}
 766
 767/*
 768 * Scan from the current position of the sync pointer to find the entry that
 769 * corresponds to the given ppa. This is necessary since write requests can be
 770 * completed out of order. The assumption is that the ppa is close to the sync
 771 * pointer thus the search will not take long.
 772 *
 773 * The caller of this function must guarantee that the sync pointer will no
 774 * reach the entry while it is using the metadata associated with it. With this
 775 * assumption in mind, there is no need to take the sync lock.
 776 */
 777struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
 778                                              struct ppa_addr *ppa)
 779{
 780        unsigned int sync, subm, count;
 781        unsigned int i;
 782
 783        sync = READ_ONCE(rb->sync);
 784        subm = READ_ONCE(rb->subm);
 785        count = pblk_rb_ring_count(subm, sync, rb->nr_entries);
 786
 787        for (i = 0; i < count; i++)
 788                sync = (sync + 1) & (rb->nr_entries - 1);
 789
 790        return NULL;
 791}
 792
 793int pblk_rb_tear_down_check(struct pblk_rb *rb)
 794{
 795        struct pblk_rb_entry *entry;
 796        int i;
 797        int ret = 0;
 798
 799        spin_lock(&rb->w_lock);
 800        spin_lock_irq(&rb->s_lock);
 801
 802        if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
 803                                (rb->sync == rb->l2p_update) &&
 804                                (rb->sync_point == EMPTY_ENTRY)) {
 805                goto out;
 806        }
 807
 808        if (!rb->entries) {
 809                ret = 1;
 810                goto out;
 811        }
 812
 813        for (i = 0; i < rb->nr_entries; i++) {
 814                entry = &rb->entries[i];
 815
 816                if (!entry->data) {
 817                        ret = 1;
 818                        goto out;
 819                }
 820        }
 821
 822out:
 823        spin_unlock(&rb->w_lock);
 824        spin_unlock_irq(&rb->s_lock);
 825
 826        return ret;
 827}
 828
 829unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
 830{
 831        return (pos & (rb->nr_entries - 1));
 832}
 833
 834int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
 835{
 836        return (pos >= rb->nr_entries);
 837}
 838
 839ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
 840{
 841        struct pblk *pblk = container_of(rb, struct pblk, rwb);
 842        struct pblk_c_ctx *c;
 843        ssize_t offset;
 844        int queued_entries = 0;
 845
 846        spin_lock_irq(&rb->s_lock);
 847        list_for_each_entry(c, &pblk->compl_list, list)
 848                queued_entries++;
 849        spin_unlock_irq(&rb->s_lock);
 850
 851        if (rb->sync_point != EMPTY_ENTRY)
 852                offset = scnprintf(buf, PAGE_SIZE,
 853                        "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
 854                        rb->nr_entries,
 855                        rb->mem,
 856                        rb->subm,
 857                        rb->sync,
 858                        rb->l2p_update,
 859#ifdef CONFIG_NVM_DEBUG
 860                        atomic_read(&rb->inflight_sync_point),
 861#else
 862                        0,
 863#endif
 864                        rb->sync_point,
 865                        pblk_rb_read_count(rb),
 866                        pblk_rb_space(rb),
 867                        pblk_rb_sync_point_count(rb),
 868                        queued_entries);
 869        else
 870                offset = scnprintf(buf, PAGE_SIZE,
 871                        "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
 872                        rb->nr_entries,
 873                        rb->mem,
 874                        rb->subm,
 875                        rb->sync,
 876                        rb->l2p_update,
 877#ifdef CONFIG_NVM_DEBUG
 878                        atomic_read(&rb->inflight_sync_point),
 879#else
 880                        0,
 881#endif
 882                        pblk_rb_read_count(rb),
 883                        pblk_rb_space(rb),
 884                        pblk_rb_sync_point_count(rb),
 885                        queued_entries);
 886
 887        return offset;
 888}
 889