linux/drivers/lightnvm/pblk-gc.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 CNEX Labs
   3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
   4 *                  Matias Bjorling <matias@cnexlabs.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License version
   8 * 2 as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License for more details.
  14 *
  15 * pblk-gc.c - pblk's garbage collector
  16 */
  17
  18#include "pblk.h"
  19#include <linux/delay.h>
  20
  21static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
  22{
  23        vfree(gc_rq->data);
  24        kfree(gc_rq);
  25}
  26
  27static int pblk_gc_write(struct pblk *pblk)
  28{
  29        struct pblk_gc *gc = &pblk->gc;
  30        struct pblk_gc_rq *gc_rq, *tgc_rq;
  31        LIST_HEAD(w_list);
  32
  33        spin_lock(&gc->w_lock);
  34        if (list_empty(&gc->w_list)) {
  35                spin_unlock(&gc->w_lock);
  36                return 1;
  37        }
  38
  39        list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
  40        gc->w_entries = 0;
  41        spin_unlock(&gc->w_lock);
  42
  43        list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
  44                pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
  45                                gc_rq->nr_secs, gc_rq->secs_to_gc,
  46                                gc_rq->line, PBLK_IOTYPE_GC);
  47
  48                list_del(&gc_rq->list);
  49                kref_put(&gc_rq->line->ref, pblk_line_put);
  50                pblk_gc_free_gc_rq(gc_rq);
  51        }
  52
  53        return 0;
  54}
  55
  56static void pblk_gc_writer_kick(struct pblk_gc *gc)
  57{
  58        wake_up_process(gc->gc_writer_ts);
  59}
  60
  61/*
  62 * Responsible for managing all memory related to a gc request. Also in case of
  63 * failure
  64 */
  65static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
  66{
  67        struct nvm_tgt_dev *dev = pblk->dev;
  68        struct nvm_geo *geo = &dev->geo;
  69        struct pblk_gc *gc = &pblk->gc;
  70        struct pblk_line *line = gc_rq->line;
  71        void *data;
  72        unsigned int secs_to_gc;
  73        int ret = 0;
  74
  75        data = vmalloc(gc_rq->nr_secs * geo->sec_size);
  76        if (!data) {
  77                ret = -ENOMEM;
  78                goto out;
  79        }
  80
  81        /* Read from GC victim block */
  82        if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
  83                                                        &secs_to_gc, line)) {
  84                ret = -EFAULT;
  85                goto free_data;
  86        }
  87
  88        if (!secs_to_gc)
  89                goto free_rq;
  90
  91        gc_rq->data = data;
  92        gc_rq->secs_to_gc = secs_to_gc;
  93
  94retry:
  95        spin_lock(&gc->w_lock);
  96        if (gc->w_entries >= PBLK_GC_W_QD) {
  97                spin_unlock(&gc->w_lock);
  98                pblk_gc_writer_kick(&pblk->gc);
  99                usleep_range(128, 256);
 100                goto retry;
 101        }
 102        gc->w_entries++;
 103        list_add_tail(&gc_rq->list, &gc->w_list);
 104        spin_unlock(&gc->w_lock);
 105
 106        pblk_gc_writer_kick(&pblk->gc);
 107
 108        return 0;
 109
 110free_rq:
 111        kfree(gc_rq);
 112free_data:
 113        vfree(data);
 114out:
 115        kref_put(&line->ref, pblk_line_put);
 116        return ret;
 117}
 118
 119static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
 120{
 121        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 122        struct list_head *move_list;
 123
 124        spin_lock(&line->lock);
 125        WARN_ON(line->state != PBLK_LINESTATE_GC);
 126        line->state = PBLK_LINESTATE_CLOSED;
 127        move_list = pblk_line_gc_list(pblk, line);
 128        spin_unlock(&line->lock);
 129
 130        if (move_list) {
 131                spin_lock(&l_mg->gc_lock);
 132                list_add_tail(&line->list, move_list);
 133                spin_unlock(&l_mg->gc_lock);
 134        }
 135}
 136
 137static void pblk_gc_line_ws(struct work_struct *work)
 138{
 139        struct pblk_line_ws *line_rq_ws = container_of(work,
 140                                                struct pblk_line_ws, ws);
 141        struct pblk *pblk = line_rq_ws->pblk;
 142        struct pblk_gc *gc = &pblk->gc;
 143        struct pblk_line *line = line_rq_ws->line;
 144        struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
 145
 146        up(&gc->gc_sem);
 147
 148        if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
 149                pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
 150                                                line->id, *line->vsc,
 151                                                gc_rq->nr_secs);
 152        }
 153
 154        mempool_free(line_rq_ws, pblk->line_ws_pool);
 155}
 156
 157static void pblk_gc_line_prepare_ws(struct work_struct *work)
 158{
 159        struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
 160                                                                        ws);
 161        struct pblk *pblk = line_ws->pblk;
 162        struct pblk_line *line = line_ws->line;
 163        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 164        struct pblk_line_meta *lm = &pblk->lm;
 165        struct pblk_gc *gc = &pblk->gc;
 166        struct line_emeta *emeta_buf;
 167        struct pblk_line_ws *line_rq_ws;
 168        struct pblk_gc_rq *gc_rq;
 169        __le64 *lba_list;
 170        int sec_left, nr_secs, bit;
 171        int ret;
 172
 173        emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
 174                                                                GFP_KERNEL);
 175        if (!emeta_buf) {
 176                pr_err("pblk: cannot use GC emeta\n");
 177                return;
 178        }
 179
 180        ret = pblk_line_read_emeta(pblk, line, emeta_buf);
 181        if (ret) {
 182                pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
 183                goto fail_free_emeta;
 184        }
 185
 186        /* If this read fails, it means that emeta is corrupted. For now, leave
 187         * the line untouched. TODO: Implement a recovery routine that scans and
 188         * moves all sectors on the line.
 189         */
 190        lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
 191        if (!lba_list) {
 192                pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
 193                goto fail_free_emeta;
 194        }
 195
 196        sec_left = pblk_line_vsc(line);
 197        if (sec_left < 0) {
 198                pr_err("pblk: corrupted GC line (%d)\n", line->id);
 199                goto fail_free_emeta;
 200        }
 201
 202        bit = -1;
 203next_rq:
 204        gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
 205        if (!gc_rq)
 206                goto fail_free_emeta;
 207
 208        nr_secs = 0;
 209        do {
 210                bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
 211                                                                bit + 1);
 212                if (bit > line->emeta_ssec)
 213                        break;
 214
 215                gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
 216        } while (nr_secs < pblk->max_write_pgs);
 217
 218        if (unlikely(!nr_secs)) {
 219                kfree(gc_rq);
 220                goto out;
 221        }
 222
 223        gc_rq->nr_secs = nr_secs;
 224        gc_rq->line = line;
 225
 226        line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
 227        if (!line_rq_ws)
 228                goto fail_free_gc_rq;
 229
 230        line_rq_ws->pblk = pblk;
 231        line_rq_ws->line = line;
 232        line_rq_ws->priv = gc_rq;
 233
 234        down(&gc->gc_sem);
 235        kref_get(&line->ref);
 236
 237        INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
 238        queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
 239
 240        sec_left -= nr_secs;
 241        if (sec_left > 0)
 242                goto next_rq;
 243
 244out:
 245        pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
 246        mempool_free(line_ws, pblk->line_ws_pool);
 247
 248        kref_put(&line->ref, pblk_line_put);
 249        atomic_dec(&gc->inflight_gc);
 250
 251        return;
 252
 253fail_free_gc_rq:
 254        kfree(gc_rq);
 255fail_free_emeta:
 256        pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
 257        pblk_put_line_back(pblk, line);
 258        kref_put(&line->ref, pblk_line_put);
 259        mempool_free(line_ws, pblk->line_ws_pool);
 260        atomic_dec(&gc->inflight_gc);
 261
 262        pr_err("pblk: Failed to GC line %d\n", line->id);
 263}
 264
 265static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
 266{
 267        struct pblk_gc *gc = &pblk->gc;
 268        struct pblk_line_ws *line_ws;
 269
 270        pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
 271
 272        line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
 273        if (!line_ws)
 274                return -ENOMEM;
 275
 276        line_ws->pblk = pblk;
 277        line_ws->line = line;
 278
 279        INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
 280        queue_work(gc->gc_reader_wq, &line_ws->ws);
 281
 282        return 0;
 283}
 284
 285static int pblk_gc_read(struct pblk *pblk)
 286{
 287        struct pblk_gc *gc = &pblk->gc;
 288        struct pblk_line *line;
 289
 290        spin_lock(&gc->r_lock);
 291        if (list_empty(&gc->r_list)) {
 292                spin_unlock(&gc->r_lock);
 293                return 1;
 294        }
 295
 296        line = list_first_entry(&gc->r_list, struct pblk_line, list);
 297        list_del(&line->list);
 298        spin_unlock(&gc->r_lock);
 299
 300        pblk_gc_kick(pblk);
 301
 302        if (pblk_gc_line(pblk, line))
 303                pr_err("pblk: failed to GC line %d\n", line->id);
 304
 305        return 0;
 306}
 307
 308static void pblk_gc_reader_kick(struct pblk_gc *gc)
 309{
 310        wake_up_process(gc->gc_reader_ts);
 311}
 312
 313static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
 314                                                 struct list_head *group_list)
 315{
 316        struct pblk_line *line, *victim;
 317        int line_vsc, victim_vsc;
 318
 319        victim = list_first_entry(group_list, struct pblk_line, list);
 320        list_for_each_entry(line, group_list, list) {
 321                line_vsc = le32_to_cpu(*line->vsc);
 322                victim_vsc = le32_to_cpu(*victim->vsc);
 323                if (line_vsc < victim_vsc)
 324                        victim = line;
 325        }
 326
 327        return victim;
 328}
 329
 330static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
 331{
 332        unsigned int nr_blocks_free, nr_blocks_need;
 333
 334        nr_blocks_need = pblk_rl_high_thrs(rl);
 335        nr_blocks_free = pblk_rl_nr_free_blks(rl);
 336
 337        /* This is not critical, no need to take lock here */
 338        return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
 339}
 340
 341/*
 342 * Lines with no valid sectors will be returned to the free list immediately. If
 343 * GC is activated - either because the free block count is under the determined
 344 * threshold, or because it is being forced from user space - only lines with a
 345 * high count of invalid sectors will be recycled.
 346 */
 347static void pblk_gc_run(struct pblk *pblk)
 348{
 349        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 350        struct pblk_gc *gc = &pblk->gc;
 351        struct pblk_line *line;
 352        struct list_head *group_list;
 353        bool run_gc;
 354        int inflight_gc, gc_group = 0, prev_group = 0;
 355
 356        do {
 357                spin_lock(&l_mg->gc_lock);
 358                if (list_empty(&l_mg->gc_full_list)) {
 359                        spin_unlock(&l_mg->gc_lock);
 360                        break;
 361                }
 362
 363                line = list_first_entry(&l_mg->gc_full_list,
 364                                                        struct pblk_line, list);
 365
 366                spin_lock(&line->lock);
 367                WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
 368                line->state = PBLK_LINESTATE_GC;
 369                spin_unlock(&line->lock);
 370
 371                list_del(&line->list);
 372                spin_unlock(&l_mg->gc_lock);
 373
 374                kref_put(&line->ref, pblk_line_put);
 375        } while (1);
 376
 377        run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
 378        if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
 379                return;
 380
 381next_gc_group:
 382        group_list = l_mg->gc_lists[gc_group++];
 383
 384        do {
 385                spin_lock(&l_mg->gc_lock);
 386                if (list_empty(group_list)) {
 387                        spin_unlock(&l_mg->gc_lock);
 388                        break;
 389                }
 390
 391                line = pblk_gc_get_victim_line(pblk, group_list);
 392
 393                spin_lock(&line->lock);
 394                WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
 395                line->state = PBLK_LINESTATE_GC;
 396                spin_unlock(&line->lock);
 397
 398                list_del(&line->list);
 399                spin_unlock(&l_mg->gc_lock);
 400
 401                spin_lock(&gc->r_lock);
 402                list_add_tail(&line->list, &gc->r_list);
 403                spin_unlock(&gc->r_lock);
 404
 405                inflight_gc = atomic_inc_return(&gc->inflight_gc);
 406                pblk_gc_reader_kick(gc);
 407
 408                prev_group = 1;
 409
 410                /* No need to queue up more GC lines than we can handle */
 411                run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
 412                if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
 413                        break;
 414        } while (1);
 415
 416        if (!prev_group && pblk->rl.rb_state > gc_group &&
 417                                                gc_group < PBLK_GC_NR_LISTS)
 418                goto next_gc_group;
 419}
 420
 421void pblk_gc_kick(struct pblk *pblk)
 422{
 423        struct pblk_gc *gc = &pblk->gc;
 424
 425        wake_up_process(gc->gc_ts);
 426        pblk_gc_writer_kick(gc);
 427        pblk_gc_reader_kick(gc);
 428        mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 429}
 430
 431static void pblk_gc_timer(unsigned long data)
 432{
 433        struct pblk *pblk = (struct pblk *)data;
 434
 435        pblk_gc_kick(pblk);
 436}
 437
 438static int pblk_gc_ts(void *data)
 439{
 440        struct pblk *pblk = data;
 441
 442        while (!kthread_should_stop()) {
 443                pblk_gc_run(pblk);
 444                set_current_state(TASK_INTERRUPTIBLE);
 445                io_schedule();
 446        }
 447
 448        return 0;
 449}
 450
 451static int pblk_gc_writer_ts(void *data)
 452{
 453        struct pblk *pblk = data;
 454
 455        while (!kthread_should_stop()) {
 456                if (!pblk_gc_write(pblk))
 457                        continue;
 458                set_current_state(TASK_INTERRUPTIBLE);
 459                io_schedule();
 460        }
 461
 462        return 0;
 463}
 464
 465static int pblk_gc_reader_ts(void *data)
 466{
 467        struct pblk *pblk = data;
 468
 469        while (!kthread_should_stop()) {
 470                if (!pblk_gc_read(pblk))
 471                        continue;
 472                set_current_state(TASK_INTERRUPTIBLE);
 473                io_schedule();
 474        }
 475
 476        return 0;
 477}
 478
 479static void pblk_gc_start(struct pblk *pblk)
 480{
 481        pblk->gc.gc_active = 1;
 482        pr_debug("pblk: gc start\n");
 483}
 484
 485void pblk_gc_should_start(struct pblk *pblk)
 486{
 487        struct pblk_gc *gc = &pblk->gc;
 488
 489        if (gc->gc_enabled && !gc->gc_active)
 490                pblk_gc_start(pblk);
 491
 492        pblk_gc_kick(pblk);
 493}
 494
 495/*
 496 * If flush_wq == 1 then no lock should be held by the caller since
 497 * flush_workqueue can sleep
 498 */
 499static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
 500{
 501        pblk->gc.gc_active = 0;
 502        pr_debug("pblk: gc stop\n");
 503}
 504
 505void pblk_gc_should_stop(struct pblk *pblk)
 506{
 507        struct pblk_gc *gc = &pblk->gc;
 508
 509        if (gc->gc_active && !gc->gc_forced)
 510                pblk_gc_stop(pblk, 0);
 511}
 512
 513void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
 514                              int *gc_active)
 515{
 516        struct pblk_gc *gc = &pblk->gc;
 517
 518        spin_lock(&gc->lock);
 519        *gc_enabled = gc->gc_enabled;
 520        *gc_active = gc->gc_active;
 521        spin_unlock(&gc->lock);
 522}
 523
 524int pblk_gc_sysfs_force(struct pblk *pblk, int force)
 525{
 526        struct pblk_gc *gc = &pblk->gc;
 527
 528        if (force < 0 || force > 1)
 529                return -EINVAL;
 530
 531        spin_lock(&gc->lock);
 532        gc->gc_forced = force;
 533
 534        if (force)
 535                gc->gc_enabled = 1;
 536        else
 537                gc->gc_enabled = 0;
 538        spin_unlock(&gc->lock);
 539
 540        pblk_gc_should_start(pblk);
 541
 542        return 0;
 543}
 544
 545int pblk_gc_init(struct pblk *pblk)
 546{
 547        struct pblk_gc *gc = &pblk->gc;
 548        int ret;
 549
 550        gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
 551        if (IS_ERR(gc->gc_ts)) {
 552                pr_err("pblk: could not allocate GC main kthread\n");
 553                return PTR_ERR(gc->gc_ts);
 554        }
 555
 556        gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
 557                                                        "pblk-gc-writer-ts");
 558        if (IS_ERR(gc->gc_writer_ts)) {
 559                pr_err("pblk: could not allocate GC writer kthread\n");
 560                ret = PTR_ERR(gc->gc_writer_ts);
 561                goto fail_free_main_kthread;
 562        }
 563
 564        gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
 565                                                        "pblk-gc-reader-ts");
 566        if (IS_ERR(gc->gc_reader_ts)) {
 567                pr_err("pblk: could not allocate GC reader kthread\n");
 568                ret = PTR_ERR(gc->gc_reader_ts);
 569                goto fail_free_writer_kthread;
 570        }
 571
 572        setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
 573        mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 574
 575        gc->gc_active = 0;
 576        gc->gc_forced = 0;
 577        gc->gc_enabled = 1;
 578        gc->w_entries = 0;
 579        atomic_set(&gc->inflight_gc, 0);
 580
 581        /* Workqueue that reads valid sectors from a line and submit them to the
 582         * GC writer to be recycled.
 583         */
 584        gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
 585                        WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
 586        if (!gc->gc_line_reader_wq) {
 587                pr_err("pblk: could not allocate GC line reader workqueue\n");
 588                ret = -ENOMEM;
 589                goto fail_free_reader_kthread;
 590        }
 591
 592        /* Workqueue that prepare lines for GC */
 593        gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
 594                                        WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
 595        if (!gc->gc_reader_wq) {
 596                pr_err("pblk: could not allocate GC reader workqueue\n");
 597                ret = -ENOMEM;
 598                goto fail_free_reader_line_wq;
 599        }
 600
 601        spin_lock_init(&gc->lock);
 602        spin_lock_init(&gc->w_lock);
 603        spin_lock_init(&gc->r_lock);
 604
 605        sema_init(&gc->gc_sem, 128);
 606
 607        INIT_LIST_HEAD(&gc->w_list);
 608        INIT_LIST_HEAD(&gc->r_list);
 609
 610        return 0;
 611
 612fail_free_reader_line_wq:
 613        destroy_workqueue(gc->gc_line_reader_wq);
 614fail_free_reader_kthread:
 615        kthread_stop(gc->gc_reader_ts);
 616fail_free_writer_kthread:
 617        kthread_stop(gc->gc_writer_ts);
 618fail_free_main_kthread:
 619        kthread_stop(gc->gc_ts);
 620
 621        return ret;
 622}
 623
 624void pblk_gc_exit(struct pblk *pblk)
 625{
 626        struct pblk_gc *gc = &pblk->gc;
 627
 628        flush_workqueue(gc->gc_reader_wq);
 629        flush_workqueue(gc->gc_line_reader_wq);
 630
 631        del_timer(&gc->gc_timer);
 632        pblk_gc_stop(pblk, 1);
 633
 634        if (gc->gc_ts)
 635                kthread_stop(gc->gc_ts);
 636
 637        if (gc->gc_reader_wq)
 638                destroy_workqueue(gc->gc_reader_wq);
 639
 640        if (gc->gc_line_reader_wq)
 641                destroy_workqueue(gc->gc_line_reader_wq);
 642
 643        if (gc->gc_writer_ts)
 644                kthread_stop(gc->gc_writer_ts);
 645
 646        if (gc->gc_reader_ts)
 647                kthread_stop(gc->gc_reader_ts);
 648}
 649