linux/drivers/block/zram/zram_drv.c
<<
>>
Prefs
   1/*
   2 * Compressed RAM block device
   3 *
   4 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
   5 *               2012, 2013 Minchan Kim
   6 *
   7 * This code is released using a dual license strategy: BSD/GPL
   8 * You can choose the licence that better fits your requirements.
   9 *
  10 * Released under the terms of 3-clause BSD License
  11 * Released under the terms of GNU General Public License Version 2.0
  12 *
  13 */
  14
  15#define KMSG_COMPONENT "zram"
  16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  17
  18#include <linux/module.h>
  19#include <linux/kernel.h>
  20#include <linux/bio.h>
  21#include <linux/bitops.h>
  22#include <linux/blkdev.h>
  23#include <linux/buffer_head.h>
  24#include <linux/device.h>
  25#include <linux/genhd.h>
  26#include <linux/highmem.h>
  27#include <linux/slab.h>
  28#include <linux/backing-dev.h>
  29#include <linux/string.h>
  30#include <linux/vmalloc.h>
  31#include <linux/err.h>
  32#include <linux/idr.h>
  33#include <linux/sysfs.h>
  34#include <linux/debugfs.h>
  35#include <linux/cpuhotplug.h>
  36#include <linux/part_stat.h>
  37
  38#include "zram_drv.h"
  39
  40static DEFINE_IDR(zram_index_idr);
  41/* idr index must be protected */
  42static DEFINE_MUTEX(zram_index_mutex);
  43
  44static int zram_major;
  45static const char *default_compressor = "lzo-rle";
  46
  47/* Module params (documentation at end) */
  48static unsigned int num_devices = 1;
  49/*
  50 * Pages that compress to sizes equals or greater than this are stored
  51 * uncompressed in memory.
  52 */
  53static size_t huge_class_size;
  54
  55static void zram_free_page(struct zram *zram, size_t index);
  56static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
  57                                u32 index, int offset, struct bio *bio);
  58
  59
  60static int zram_slot_trylock(struct zram *zram, u32 index)
  61{
  62        return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
  63}
  64
  65static void zram_slot_lock(struct zram *zram, u32 index)
  66{
  67        bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
  68}
  69
  70static void zram_slot_unlock(struct zram *zram, u32 index)
  71{
  72        bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
  73}
  74
  75static inline bool init_done(struct zram *zram)
  76{
  77        return zram->disksize;
  78}
  79
  80static inline struct zram *dev_to_zram(struct device *dev)
  81{
  82        return (struct zram *)dev_to_disk(dev)->private_data;
  83}
  84
  85static unsigned long zram_get_handle(struct zram *zram, u32 index)
  86{
  87        return zram->table[index].handle;
  88}
  89
  90static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
  91{
  92        zram->table[index].handle = handle;
  93}
  94
  95/* flag operations require table entry bit_spin_lock() being held */
  96static bool zram_test_flag(struct zram *zram, u32 index,
  97                        enum zram_pageflags flag)
  98{
  99        return zram->table[index].flags & BIT(flag);
 100}
 101
 102static void zram_set_flag(struct zram *zram, u32 index,
 103                        enum zram_pageflags flag)
 104{
 105        zram->table[index].flags |= BIT(flag);
 106}
 107
 108static void zram_clear_flag(struct zram *zram, u32 index,
 109                        enum zram_pageflags flag)
 110{
 111        zram->table[index].flags &= ~BIT(flag);
 112}
 113
 114static inline void zram_set_element(struct zram *zram, u32 index,
 115                        unsigned long element)
 116{
 117        zram->table[index].element = element;
 118}
 119
 120static unsigned long zram_get_element(struct zram *zram, u32 index)
 121{
 122        return zram->table[index].element;
 123}
 124
 125static size_t zram_get_obj_size(struct zram *zram, u32 index)
 126{
 127        return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
 128}
 129
 130static void zram_set_obj_size(struct zram *zram,
 131                                        u32 index, size_t size)
 132{
 133        unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
 134
 135        zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
 136}
 137
 138static inline bool zram_allocated(struct zram *zram, u32 index)
 139{
 140        return zram_get_obj_size(zram, index) ||
 141                        zram_test_flag(zram, index, ZRAM_SAME) ||
 142                        zram_test_flag(zram, index, ZRAM_WB);
 143}
 144
 145#if PAGE_SIZE != 4096
 146static inline bool is_partial_io(struct bio_vec *bvec)
 147{
 148        return bvec->bv_len != PAGE_SIZE;
 149}
 150#else
 151static inline bool is_partial_io(struct bio_vec *bvec)
 152{
 153        return false;
 154}
 155#endif
 156
 157/*
 158 * Check if request is within bounds and aligned on zram logical blocks.
 159 */
 160static inline bool valid_io_request(struct zram *zram,
 161                sector_t start, unsigned int size)
 162{
 163        u64 end, bound;
 164
 165        /* unaligned request */
 166        if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
 167                return false;
 168        if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
 169                return false;
 170
 171        end = start + (size >> SECTOR_SHIFT);
 172        bound = zram->disksize >> SECTOR_SHIFT;
 173        /* out of range range */
 174        if (unlikely(start >= bound || end > bound || start > end))
 175                return false;
 176
 177        /* I/O request is valid */
 178        return true;
 179}
 180
 181static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
 182{
 183        *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
 184        *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
 185}
 186
 187static inline void update_used_max(struct zram *zram,
 188                                        const unsigned long pages)
 189{
 190        unsigned long old_max, cur_max;
 191
 192        old_max = atomic_long_read(&zram->stats.max_used_pages);
 193
 194        do {
 195                cur_max = old_max;
 196                if (pages > cur_max)
 197                        old_max = atomic_long_cmpxchg(
 198                                &zram->stats.max_used_pages, cur_max, pages);
 199        } while (old_max != cur_max);
 200}
 201
 202static inline void zram_fill_page(void *ptr, unsigned long len,
 203                                        unsigned long value)
 204{
 205        WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
 206        memset_l(ptr, value, len / sizeof(unsigned long));
 207}
 208
 209static bool page_same_filled(void *ptr, unsigned long *element)
 210{
 211        unsigned long *page;
 212        unsigned long val;
 213        unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
 214
 215        page = (unsigned long *)ptr;
 216        val = page[0];
 217
 218        if (val != page[last_pos])
 219                return false;
 220
 221        for (pos = 1; pos < last_pos; pos++) {
 222                if (val != page[pos])
 223                        return false;
 224        }
 225
 226        *element = val;
 227
 228        return true;
 229}
 230
 231static ssize_t initstate_show(struct device *dev,
 232                struct device_attribute *attr, char *buf)
 233{
 234        u32 val;
 235        struct zram *zram = dev_to_zram(dev);
 236
 237        down_read(&zram->init_lock);
 238        val = init_done(zram);
 239        up_read(&zram->init_lock);
 240
 241        return scnprintf(buf, PAGE_SIZE, "%u\n", val);
 242}
 243
 244static ssize_t disksize_show(struct device *dev,
 245                struct device_attribute *attr, char *buf)
 246{
 247        struct zram *zram = dev_to_zram(dev);
 248
 249        return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
 250}
 251
 252static ssize_t mem_limit_store(struct device *dev,
 253                struct device_attribute *attr, const char *buf, size_t len)
 254{
 255        u64 limit;
 256        char *tmp;
 257        struct zram *zram = dev_to_zram(dev);
 258
 259        limit = memparse(buf, &tmp);
 260        if (buf == tmp) /* no chars parsed, invalid input */
 261                return -EINVAL;
 262
 263        down_write(&zram->init_lock);
 264        zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
 265        up_write(&zram->init_lock);
 266
 267        return len;
 268}
 269
 270static ssize_t mem_used_max_store(struct device *dev,
 271                struct device_attribute *attr, const char *buf, size_t len)
 272{
 273        int err;
 274        unsigned long val;
 275        struct zram *zram = dev_to_zram(dev);
 276
 277        err = kstrtoul(buf, 10, &val);
 278        if (err || val != 0)
 279                return -EINVAL;
 280
 281        down_read(&zram->init_lock);
 282        if (init_done(zram)) {
 283                atomic_long_set(&zram->stats.max_used_pages,
 284                                zs_get_total_pages(zram->mem_pool));
 285        }
 286        up_read(&zram->init_lock);
 287
 288        return len;
 289}
 290
 291static ssize_t idle_store(struct device *dev,
 292                struct device_attribute *attr, const char *buf, size_t len)
 293{
 294        struct zram *zram = dev_to_zram(dev);
 295        unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 296        int index;
 297
 298        if (!sysfs_streq(buf, "all"))
 299                return -EINVAL;
 300
 301        down_read(&zram->init_lock);
 302        if (!init_done(zram)) {
 303                up_read(&zram->init_lock);
 304                return -EINVAL;
 305        }
 306
 307        for (index = 0; index < nr_pages; index++) {
 308                /*
 309                 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
 310                 * See the comment in writeback_store.
 311                 */
 312                zram_slot_lock(zram, index);
 313                if (zram_allocated(zram, index) &&
 314                                !zram_test_flag(zram, index, ZRAM_UNDER_WB))
 315                        zram_set_flag(zram, index, ZRAM_IDLE);
 316                zram_slot_unlock(zram, index);
 317        }
 318
 319        up_read(&zram->init_lock);
 320
 321        return len;
 322}
 323
 324#ifdef CONFIG_ZRAM_WRITEBACK
 325static ssize_t writeback_limit_enable_store(struct device *dev,
 326                struct device_attribute *attr, const char *buf, size_t len)
 327{
 328        struct zram *zram = dev_to_zram(dev);
 329        u64 val;
 330        ssize_t ret = -EINVAL;
 331
 332        if (kstrtoull(buf, 10, &val))
 333                return ret;
 334
 335        down_read(&zram->init_lock);
 336        spin_lock(&zram->wb_limit_lock);
 337        zram->wb_limit_enable = val;
 338        spin_unlock(&zram->wb_limit_lock);
 339        up_read(&zram->init_lock);
 340        ret = len;
 341
 342        return ret;
 343}
 344
 345static ssize_t writeback_limit_enable_show(struct device *dev,
 346                struct device_attribute *attr, char *buf)
 347{
 348        bool val;
 349        struct zram *zram = dev_to_zram(dev);
 350
 351        down_read(&zram->init_lock);
 352        spin_lock(&zram->wb_limit_lock);
 353        val = zram->wb_limit_enable;
 354        spin_unlock(&zram->wb_limit_lock);
 355        up_read(&zram->init_lock);
 356
 357        return scnprintf(buf, PAGE_SIZE, "%d\n", val);
 358}
 359
 360static ssize_t writeback_limit_store(struct device *dev,
 361                struct device_attribute *attr, const char *buf, size_t len)
 362{
 363        struct zram *zram = dev_to_zram(dev);
 364        u64 val;
 365        ssize_t ret = -EINVAL;
 366
 367        if (kstrtoull(buf, 10, &val))
 368                return ret;
 369
 370        down_read(&zram->init_lock);
 371        spin_lock(&zram->wb_limit_lock);
 372        zram->bd_wb_limit = val;
 373        spin_unlock(&zram->wb_limit_lock);
 374        up_read(&zram->init_lock);
 375        ret = len;
 376
 377        return ret;
 378}
 379
 380static ssize_t writeback_limit_show(struct device *dev,
 381                struct device_attribute *attr, char *buf)
 382{
 383        u64 val;
 384        struct zram *zram = dev_to_zram(dev);
 385
 386        down_read(&zram->init_lock);
 387        spin_lock(&zram->wb_limit_lock);
 388        val = zram->bd_wb_limit;
 389        spin_unlock(&zram->wb_limit_lock);
 390        up_read(&zram->init_lock);
 391
 392        return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
 393}
 394
 395static void reset_bdev(struct zram *zram)
 396{
 397        struct block_device *bdev;
 398
 399        if (!zram->backing_dev)
 400                return;
 401
 402        bdev = zram->bdev;
 403        if (zram->old_block_size)
 404                set_blocksize(bdev, zram->old_block_size);
 405        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 406        /* hope filp_close flush all of IO */
 407        filp_close(zram->backing_dev, NULL);
 408        zram->backing_dev = NULL;
 409        zram->old_block_size = 0;
 410        zram->bdev = NULL;
 411        zram->disk->queue->backing_dev_info->capabilities |=
 412                                BDI_CAP_SYNCHRONOUS_IO;
 413        kvfree(zram->bitmap);
 414        zram->bitmap = NULL;
 415}
 416
 417static ssize_t backing_dev_show(struct device *dev,
 418                struct device_attribute *attr, char *buf)
 419{
 420        struct file *file;
 421        struct zram *zram = dev_to_zram(dev);
 422        char *p;
 423        ssize_t ret;
 424
 425        down_read(&zram->init_lock);
 426        file = zram->backing_dev;
 427        if (!file) {
 428                memcpy(buf, "none\n", 5);
 429                up_read(&zram->init_lock);
 430                return 5;
 431        }
 432
 433        p = file_path(file, buf, PAGE_SIZE - 1);
 434        if (IS_ERR(p)) {
 435                ret = PTR_ERR(p);
 436                goto out;
 437        }
 438
 439        ret = strlen(p);
 440        memmove(buf, p, ret);
 441        buf[ret++] = '\n';
 442out:
 443        up_read(&zram->init_lock);
 444        return ret;
 445}
 446
 447static ssize_t backing_dev_store(struct device *dev,
 448                struct device_attribute *attr, const char *buf, size_t len)
 449{
 450        char *file_name;
 451        size_t sz;
 452        struct file *backing_dev = NULL;
 453        struct inode *inode;
 454        struct address_space *mapping;
 455        unsigned int bitmap_sz, old_block_size = 0;
 456        unsigned long nr_pages, *bitmap = NULL;
 457        struct block_device *bdev = NULL;
 458        int err;
 459        struct zram *zram = dev_to_zram(dev);
 460
 461        file_name = kmalloc(PATH_MAX, GFP_KERNEL);
 462        if (!file_name)
 463                return -ENOMEM;
 464
 465        down_write(&zram->init_lock);
 466        if (init_done(zram)) {
 467                pr_info("Can't setup backing device for initialized device\n");
 468                err = -EBUSY;
 469                goto out;
 470        }
 471
 472        strlcpy(file_name, buf, PATH_MAX);
 473        /* ignore trailing newline */
 474        sz = strlen(file_name);
 475        if (sz > 0 && file_name[sz - 1] == '\n')
 476                file_name[sz - 1] = 0x00;
 477
 478        backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
 479        if (IS_ERR(backing_dev)) {
 480                err = PTR_ERR(backing_dev);
 481                backing_dev = NULL;
 482                goto out;
 483        }
 484
 485        mapping = backing_dev->f_mapping;
 486        inode = mapping->host;
 487
 488        /* Support only block device in this moment */
 489        if (!S_ISBLK(inode->i_mode)) {
 490                err = -ENOTBLK;
 491                goto out;
 492        }
 493
 494        bdev = bdgrab(I_BDEV(inode));
 495        err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
 496        if (err < 0) {
 497                bdev = NULL;
 498                goto out;
 499        }
 500
 501        nr_pages = i_size_read(inode) >> PAGE_SHIFT;
 502        bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
 503        bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
 504        if (!bitmap) {
 505                err = -ENOMEM;
 506                goto out;
 507        }
 508
 509        old_block_size = block_size(bdev);
 510        err = set_blocksize(bdev, PAGE_SIZE);
 511        if (err)
 512                goto out;
 513
 514        reset_bdev(zram);
 515
 516        zram->old_block_size = old_block_size;
 517        zram->bdev = bdev;
 518        zram->backing_dev = backing_dev;
 519        zram->bitmap = bitmap;
 520        zram->nr_pages = nr_pages;
 521        /*
 522         * With writeback feature, zram does asynchronous IO so it's no longer
 523         * synchronous device so let's remove synchronous io flag. Othewise,
 524         * upper layer(e.g., swap) could wait IO completion rather than
 525         * (submit and return), which will cause system sluggish.
 526         * Furthermore, when the IO function returns(e.g., swap_readpage),
 527         * upper layer expects IO was done so it could deallocate the page
 528         * freely but in fact, IO is going on so finally could cause
 529         * use-after-free when the IO is really done.
 530         */
 531        zram->disk->queue->backing_dev_info->capabilities &=
 532                        ~BDI_CAP_SYNCHRONOUS_IO;
 533        up_write(&zram->init_lock);
 534
 535        pr_info("setup backing device %s\n", file_name);
 536        kfree(file_name);
 537
 538        return len;
 539out:
 540        if (bitmap)
 541                kvfree(bitmap);
 542
 543        if (bdev)
 544                blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 545
 546        if (backing_dev)
 547                filp_close(backing_dev, NULL);
 548
 549        up_write(&zram->init_lock);
 550
 551        kfree(file_name);
 552
 553        return err;
 554}
 555
 556static unsigned long alloc_block_bdev(struct zram *zram)
 557{
 558        unsigned long blk_idx = 1;
 559retry:
 560        /* skip 0 bit to confuse zram.handle = 0 */
 561        blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
 562        if (blk_idx == zram->nr_pages)
 563                return 0;
 564
 565        if (test_and_set_bit(blk_idx, zram->bitmap))
 566                goto retry;
 567
 568        atomic64_inc(&zram->stats.bd_count);
 569        return blk_idx;
 570}
 571
 572static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
 573{
 574        int was_set;
 575
 576        was_set = test_and_clear_bit(blk_idx, zram->bitmap);
 577        WARN_ON_ONCE(!was_set);
 578        atomic64_dec(&zram->stats.bd_count);
 579}
 580
 581static void zram_page_end_io(struct bio *bio)
 582{
 583        struct page *page = bio_first_page_all(bio);
 584
 585        page_endio(page, op_is_write(bio_op(bio)),
 586                        blk_status_to_errno(bio->bi_status));
 587        bio_put(bio);
 588}
 589
 590/*
 591 * Returns 1 if the submission is successful.
 592 */
 593static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 594                        unsigned long entry, struct bio *parent)
 595{
 596        struct bio *bio;
 597
 598        bio = bio_alloc(GFP_ATOMIC, 1);
 599        if (!bio)
 600                return -ENOMEM;
 601
 602        bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
 603        bio_set_dev(bio, zram->bdev);
 604        if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
 605                bio_put(bio);
 606                return -EIO;
 607        }
 608
 609        if (!parent) {
 610                bio->bi_opf = REQ_OP_READ;
 611                bio->bi_end_io = zram_page_end_io;
 612        } else {
 613                bio->bi_opf = parent->bi_opf;
 614                bio_chain(bio, parent);
 615        }
 616
 617        submit_bio(bio);
 618        return 1;
 619}
 620
 621#define HUGE_WRITEBACK 1
 622#define IDLE_WRITEBACK 2
 623
 624static ssize_t writeback_store(struct device *dev,
 625                struct device_attribute *attr, const char *buf, size_t len)
 626{
 627        struct zram *zram = dev_to_zram(dev);
 628        unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 629        unsigned long index;
 630        struct bio bio;
 631        struct bio_vec bio_vec;
 632        struct page *page;
 633        ssize_t ret = len;
 634        int mode;
 635        unsigned long blk_idx = 0;
 636
 637        if (sysfs_streq(buf, "idle"))
 638                mode = IDLE_WRITEBACK;
 639        else if (sysfs_streq(buf, "huge"))
 640                mode = HUGE_WRITEBACK;
 641        else
 642                return -EINVAL;
 643
 644        down_read(&zram->init_lock);
 645        if (!init_done(zram)) {
 646                ret = -EINVAL;
 647                goto release_init_lock;
 648        }
 649
 650        if (!zram->backing_dev) {
 651                ret = -ENODEV;
 652                goto release_init_lock;
 653        }
 654
 655        page = alloc_page(GFP_KERNEL);
 656        if (!page) {
 657                ret = -ENOMEM;
 658                goto release_init_lock;
 659        }
 660
 661        for (index = 0; index < nr_pages; index++) {
 662                struct bio_vec bvec;
 663
 664                bvec.bv_page = page;
 665                bvec.bv_len = PAGE_SIZE;
 666                bvec.bv_offset = 0;
 667
 668                spin_lock(&zram->wb_limit_lock);
 669                if (zram->wb_limit_enable && !zram->bd_wb_limit) {
 670                        spin_unlock(&zram->wb_limit_lock);
 671                        ret = -EIO;
 672                        break;
 673                }
 674                spin_unlock(&zram->wb_limit_lock);
 675
 676                if (!blk_idx) {
 677                        blk_idx = alloc_block_bdev(zram);
 678                        if (!blk_idx) {
 679                                ret = -ENOSPC;
 680                                break;
 681                        }
 682                }
 683
 684                zram_slot_lock(zram, index);
 685                if (!zram_allocated(zram, index))
 686                        goto next;
 687
 688                if (zram_test_flag(zram, index, ZRAM_WB) ||
 689                                zram_test_flag(zram, index, ZRAM_SAME) ||
 690                                zram_test_flag(zram, index, ZRAM_UNDER_WB))
 691                        goto next;
 692
 693                if (mode == IDLE_WRITEBACK &&
 694                          !zram_test_flag(zram, index, ZRAM_IDLE))
 695                        goto next;
 696                if (mode == HUGE_WRITEBACK &&
 697                          !zram_test_flag(zram, index, ZRAM_HUGE))
 698                        goto next;
 699                /*
 700                 * Clearing ZRAM_UNDER_WB is duty of caller.
 701                 * IOW, zram_free_page never clear it.
 702                 */
 703                zram_set_flag(zram, index, ZRAM_UNDER_WB);
 704                /* Need for hugepage writeback racing */
 705                zram_set_flag(zram, index, ZRAM_IDLE);
 706                zram_slot_unlock(zram, index);
 707                if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
 708                        zram_slot_lock(zram, index);
 709                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 710                        zram_clear_flag(zram, index, ZRAM_IDLE);
 711                        zram_slot_unlock(zram, index);
 712                        continue;
 713                }
 714
 715                bio_init(&bio, &bio_vec, 1);
 716                bio_set_dev(&bio, zram->bdev);
 717                bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
 718                bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
 719
 720                bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
 721                                bvec.bv_offset);
 722                /*
 723                 * XXX: A single page IO would be inefficient for write
 724                 * but it would be not bad as starter.
 725                 */
 726                ret = submit_bio_wait(&bio);
 727                if (ret) {
 728                        zram_slot_lock(zram, index);
 729                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 730                        zram_clear_flag(zram, index, ZRAM_IDLE);
 731                        zram_slot_unlock(zram, index);
 732                        continue;
 733                }
 734
 735                atomic64_inc(&zram->stats.bd_writes);
 736                /*
 737                 * We released zram_slot_lock so need to check if the slot was
 738                 * changed. If there is freeing for the slot, we can catch it
 739                 * easily by zram_allocated.
 740                 * A subtle case is the slot is freed/reallocated/marked as
 741                 * ZRAM_IDLE again. To close the race, idle_store doesn't
 742                 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
 743                 * Thus, we could close the race by checking ZRAM_IDLE bit.
 744                 */
 745                zram_slot_lock(zram, index);
 746                if (!zram_allocated(zram, index) ||
 747                          !zram_test_flag(zram, index, ZRAM_IDLE)) {
 748                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 749                        zram_clear_flag(zram, index, ZRAM_IDLE);
 750                        goto next;
 751                }
 752
 753                zram_free_page(zram, index);
 754                zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 755                zram_set_flag(zram, index, ZRAM_WB);
 756                zram_set_element(zram, index, blk_idx);
 757                blk_idx = 0;
 758                atomic64_inc(&zram->stats.pages_stored);
 759                spin_lock(&zram->wb_limit_lock);
 760                if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
 761                        zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
 762                spin_unlock(&zram->wb_limit_lock);
 763next:
 764                zram_slot_unlock(zram, index);
 765        }
 766
 767        if (blk_idx)
 768                free_block_bdev(zram, blk_idx);
 769        __free_page(page);
 770release_init_lock:
 771        up_read(&zram->init_lock);
 772
 773        return ret;
 774}
 775
 776struct zram_work {
 777        struct work_struct work;
 778        struct zram *zram;
 779        unsigned long entry;
 780        struct bio *bio;
 781        struct bio_vec bvec;
 782};
 783
 784#if PAGE_SIZE != 4096
 785static void zram_sync_read(struct work_struct *work)
 786{
 787        struct zram_work *zw = container_of(work, struct zram_work, work);
 788        struct zram *zram = zw->zram;
 789        unsigned long entry = zw->entry;
 790        struct bio *bio = zw->bio;
 791
 792        read_from_bdev_async(zram, &zw->bvec, entry, bio);
 793}
 794
 795/*
 796 * Block layer want one ->submit_bio to be active at a time, so if we use
 797 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
 798 * use a worker thread context.
 799 */
 800static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 801                                unsigned long entry, struct bio *bio)
 802{
 803        struct zram_work work;
 804
 805        work.bvec = *bvec;
 806        work.zram = zram;
 807        work.entry = entry;
 808        work.bio = bio;
 809
 810        INIT_WORK_ONSTACK(&work.work, zram_sync_read);
 811        queue_work(system_unbound_wq, &work.work);
 812        flush_work(&work.work);
 813        destroy_work_on_stack(&work.work);
 814
 815        return 1;
 816}
 817#else
 818static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 819                                unsigned long entry, struct bio *bio)
 820{
 821        WARN_ON(1);
 822        return -EIO;
 823}
 824#endif
 825
 826static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 827                        unsigned long entry, struct bio *parent, bool sync)
 828{
 829        atomic64_inc(&zram->stats.bd_reads);
 830        if (sync)
 831                return read_from_bdev_sync(zram, bvec, entry, parent);
 832        else
 833                return read_from_bdev_async(zram, bvec, entry, parent);
 834}
 835#else
 836static inline void reset_bdev(struct zram *zram) {};
 837static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 838                        unsigned long entry, struct bio *parent, bool sync)
 839{
 840        return -EIO;
 841}
 842
 843static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
 844#endif
 845
 846#ifdef CONFIG_ZRAM_MEMORY_TRACKING
 847
 848static struct dentry *zram_debugfs_root;
 849
 850static void zram_debugfs_create(void)
 851{
 852        zram_debugfs_root = debugfs_create_dir("zram", NULL);
 853}
 854
 855static void zram_debugfs_destroy(void)
 856{
 857        debugfs_remove_recursive(zram_debugfs_root);
 858}
 859
 860static void zram_accessed(struct zram *zram, u32 index)
 861{
 862        zram_clear_flag(zram, index, ZRAM_IDLE);
 863        zram->table[index].ac_time = ktime_get_boottime();
 864}
 865
 866static ssize_t read_block_state(struct file *file, char __user *buf,
 867                                size_t count, loff_t *ppos)
 868{
 869        char *kbuf;
 870        ssize_t index, written = 0;
 871        struct zram *zram = file->private_data;
 872        unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 873        struct timespec64 ts;
 874
 875        kbuf = kvmalloc(count, GFP_KERNEL);
 876        if (!kbuf)
 877                return -ENOMEM;
 878
 879        down_read(&zram->init_lock);
 880        if (!init_done(zram)) {
 881                up_read(&zram->init_lock);
 882                kvfree(kbuf);
 883                return -EINVAL;
 884        }
 885
 886        for (index = *ppos; index < nr_pages; index++) {
 887                int copied;
 888
 889                zram_slot_lock(zram, index);
 890                if (!zram_allocated(zram, index))
 891                        goto next;
 892
 893                ts = ktime_to_timespec64(zram->table[index].ac_time);
 894                copied = snprintf(kbuf + written, count,
 895                        "%12zd %12lld.%06lu %c%c%c%c\n",
 896                        index, (s64)ts.tv_sec,
 897                        ts.tv_nsec / NSEC_PER_USEC,
 898                        zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
 899                        zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
 900                        zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
 901                        zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
 902
 903                if (count < copied) {
 904                        zram_slot_unlock(zram, index);
 905                        break;
 906                }
 907                written += copied;
 908                count -= copied;
 909next:
 910                zram_slot_unlock(zram, index);
 911                *ppos += 1;
 912        }
 913
 914        up_read(&zram->init_lock);
 915        if (copy_to_user(buf, kbuf, written))
 916                written = -EFAULT;
 917        kvfree(kbuf);
 918
 919        return written;
 920}
 921
 922static const struct file_operations proc_zram_block_state_op = {
 923        .open = simple_open,
 924        .read = read_block_state,
 925        .llseek = default_llseek,
 926};
 927
 928static void zram_debugfs_register(struct zram *zram)
 929{
 930        if (!zram_debugfs_root)
 931                return;
 932
 933        zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
 934                                                zram_debugfs_root);
 935        debugfs_create_file("block_state", 0400, zram->debugfs_dir,
 936                                zram, &proc_zram_block_state_op);
 937}
 938
 939static void zram_debugfs_unregister(struct zram *zram)
 940{
 941        debugfs_remove_recursive(zram->debugfs_dir);
 942}
 943#else
 944static void zram_debugfs_create(void) {};
 945static void zram_debugfs_destroy(void) {};
 946static void zram_accessed(struct zram *zram, u32 index)
 947{
 948        zram_clear_flag(zram, index, ZRAM_IDLE);
 949};
 950static void zram_debugfs_register(struct zram *zram) {};
 951static void zram_debugfs_unregister(struct zram *zram) {};
 952#endif
 953
 954/*
 955 * We switched to per-cpu streams and this attr is not needed anymore.
 956 * However, we will keep it around for some time, because:
 957 * a) we may revert per-cpu streams in the future
 958 * b) it's visible to user space and we need to follow our 2 years
 959 *    retirement rule; but we already have a number of 'soon to be
 960 *    altered' attrs, so max_comp_streams need to wait for the next
 961 *    layoff cycle.
 962 */
 963static ssize_t max_comp_streams_show(struct device *dev,
 964                struct device_attribute *attr, char *buf)
 965{
 966        return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
 967}
 968
 969static ssize_t max_comp_streams_store(struct device *dev,
 970                struct device_attribute *attr, const char *buf, size_t len)
 971{
 972        return len;
 973}
 974
 975static ssize_t comp_algorithm_show(struct device *dev,
 976                struct device_attribute *attr, char *buf)
 977{
 978        size_t sz;
 979        struct zram *zram = dev_to_zram(dev);
 980
 981        down_read(&zram->init_lock);
 982        sz = zcomp_available_show(zram->compressor, buf);
 983        up_read(&zram->init_lock);
 984
 985        return sz;
 986}
 987
 988static ssize_t comp_algorithm_store(struct device *dev,
 989                struct device_attribute *attr, const char *buf, size_t len)
 990{
 991        struct zram *zram = dev_to_zram(dev);
 992        char compressor[ARRAY_SIZE(zram->compressor)];
 993        size_t sz;
 994
 995        strlcpy(compressor, buf, sizeof(compressor));
 996        /* ignore trailing newline */
 997        sz = strlen(compressor);
 998        if (sz > 0 && compressor[sz - 1] == '\n')
 999                compressor[sz - 1] = 0x00;
1000
1001        if (!zcomp_available_algorithm(compressor))
1002                return -EINVAL;
1003
1004        down_write(&zram->init_lock);
1005        if (init_done(zram)) {
1006                up_write(&zram->init_lock);
1007                pr_info("Can't change algorithm for initialized device\n");
1008                return -EBUSY;
1009        }
1010
1011        strcpy(zram->compressor, compressor);
1012        up_write(&zram->init_lock);
1013        return len;
1014}
1015
1016static ssize_t compact_store(struct device *dev,
1017                struct device_attribute *attr, const char *buf, size_t len)
1018{
1019        struct zram *zram = dev_to_zram(dev);
1020
1021        down_read(&zram->init_lock);
1022        if (!init_done(zram)) {
1023                up_read(&zram->init_lock);
1024                return -EINVAL;
1025        }
1026
1027        zs_compact(zram->mem_pool);
1028        up_read(&zram->init_lock);
1029
1030        return len;
1031}
1032
1033static ssize_t io_stat_show(struct device *dev,
1034                struct device_attribute *attr, char *buf)
1035{
1036        struct zram *zram = dev_to_zram(dev);
1037        ssize_t ret;
1038
1039        down_read(&zram->init_lock);
1040        ret = scnprintf(buf, PAGE_SIZE,
1041                        "%8llu %8llu %8llu %8llu\n",
1042                        (u64)atomic64_read(&zram->stats.failed_reads),
1043                        (u64)atomic64_read(&zram->stats.failed_writes),
1044                        (u64)atomic64_read(&zram->stats.invalid_io),
1045                        (u64)atomic64_read(&zram->stats.notify_free));
1046        up_read(&zram->init_lock);
1047
1048        return ret;
1049}
1050
1051static ssize_t mm_stat_show(struct device *dev,
1052                struct device_attribute *attr, char *buf)
1053{
1054        struct zram *zram = dev_to_zram(dev);
1055        struct zs_pool_stats pool_stats;
1056        u64 orig_size, mem_used = 0;
1057        long max_used;
1058        ssize_t ret;
1059
1060        memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1061
1062        down_read(&zram->init_lock);
1063        if (init_done(zram)) {
1064                mem_used = zs_get_total_pages(zram->mem_pool);
1065                zs_pool_stats(zram->mem_pool, &pool_stats);
1066        }
1067
1068        orig_size = atomic64_read(&zram->stats.pages_stored);
1069        max_used = atomic_long_read(&zram->stats.max_used_pages);
1070
1071        ret = scnprintf(buf, PAGE_SIZE,
1072                        "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
1073                        orig_size << PAGE_SHIFT,
1074                        (u64)atomic64_read(&zram->stats.compr_data_size),
1075                        mem_used << PAGE_SHIFT,
1076                        zram->limit_pages << PAGE_SHIFT,
1077                        max_used << PAGE_SHIFT,
1078                        (u64)atomic64_read(&zram->stats.same_pages),
1079                        pool_stats.pages_compacted,
1080                        (u64)atomic64_read(&zram->stats.huge_pages));
1081        up_read(&zram->init_lock);
1082
1083        return ret;
1084}
1085
1086#ifdef CONFIG_ZRAM_WRITEBACK
1087#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1088static ssize_t bd_stat_show(struct device *dev,
1089                struct device_attribute *attr, char *buf)
1090{
1091        struct zram *zram = dev_to_zram(dev);
1092        ssize_t ret;
1093
1094        down_read(&zram->init_lock);
1095        ret = scnprintf(buf, PAGE_SIZE,
1096                "%8llu %8llu %8llu\n",
1097                        FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1098                        FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1099                        FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1100        up_read(&zram->init_lock);
1101
1102        return ret;
1103}
1104#endif
1105
1106static ssize_t debug_stat_show(struct device *dev,
1107                struct device_attribute *attr, char *buf)
1108{
1109        int version = 1;
1110        struct zram *zram = dev_to_zram(dev);
1111        ssize_t ret;
1112
1113        down_read(&zram->init_lock);
1114        ret = scnprintf(buf, PAGE_SIZE,
1115                        "version: %d\n%8llu %8llu\n",
1116                        version,
1117                        (u64)atomic64_read(&zram->stats.writestall),
1118                        (u64)atomic64_read(&zram->stats.miss_free));
1119        up_read(&zram->init_lock);
1120
1121        return ret;
1122}
1123
1124static DEVICE_ATTR_RO(io_stat);
1125static DEVICE_ATTR_RO(mm_stat);
1126#ifdef CONFIG_ZRAM_WRITEBACK
1127static DEVICE_ATTR_RO(bd_stat);
1128#endif
1129static DEVICE_ATTR_RO(debug_stat);
1130
1131static void zram_meta_free(struct zram *zram, u64 disksize)
1132{
1133        size_t num_pages = disksize >> PAGE_SHIFT;
1134        size_t index;
1135
1136        /* Free all pages that are still in this zram device */
1137        for (index = 0; index < num_pages; index++)
1138                zram_free_page(zram, index);
1139
1140        zs_destroy_pool(zram->mem_pool);
1141        vfree(zram->table);
1142}
1143
1144static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1145{
1146        size_t num_pages;
1147
1148        num_pages = disksize >> PAGE_SHIFT;
1149        zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1150        if (!zram->table)
1151                return false;
1152
1153        zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1154        if (!zram->mem_pool) {
1155                vfree(zram->table);
1156                return false;
1157        }
1158
1159        if (!huge_class_size)
1160                huge_class_size = zs_huge_class_size(zram->mem_pool);
1161        return true;
1162}
1163
1164/*
1165 * To protect concurrent access to the same index entry,
1166 * caller should hold this table index entry's bit_spinlock to
1167 * indicate this index entry is accessing.
1168 */
1169static void zram_free_page(struct zram *zram, size_t index)
1170{
1171        unsigned long handle;
1172
1173#ifdef CONFIG_ZRAM_MEMORY_TRACKING
1174        zram->table[index].ac_time = 0;
1175#endif
1176        if (zram_test_flag(zram, index, ZRAM_IDLE))
1177                zram_clear_flag(zram, index, ZRAM_IDLE);
1178
1179        if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1180                zram_clear_flag(zram, index, ZRAM_HUGE);
1181                atomic64_dec(&zram->stats.huge_pages);
1182        }
1183
1184        if (zram_test_flag(zram, index, ZRAM_WB)) {
1185                zram_clear_flag(zram, index, ZRAM_WB);
1186                free_block_bdev(zram, zram_get_element(zram, index));
1187                goto out;
1188        }
1189
1190        /*
1191         * No memory is allocated for same element filled pages.
1192         * Simply clear same page flag.
1193         */
1194        if (zram_test_flag(zram, index, ZRAM_SAME)) {
1195                zram_clear_flag(zram, index, ZRAM_SAME);
1196                atomic64_dec(&zram->stats.same_pages);
1197                goto out;
1198        }
1199
1200        handle = zram_get_handle(zram, index);
1201        if (!handle)
1202                return;
1203
1204        zs_free(zram->mem_pool, handle);
1205
1206        atomic64_sub(zram_get_obj_size(zram, index),
1207                        &zram->stats.compr_data_size);
1208out:
1209        atomic64_dec(&zram->stats.pages_stored);
1210        zram_set_handle(zram, index, 0);
1211        zram_set_obj_size(zram, index, 0);
1212        WARN_ON_ONCE(zram->table[index].flags &
1213                ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1214}
1215
1216static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1217                                struct bio *bio, bool partial_io)
1218{
1219        int ret;
1220        unsigned long handle;
1221        unsigned int size;
1222        void *src, *dst;
1223
1224        zram_slot_lock(zram, index);
1225        if (zram_test_flag(zram, index, ZRAM_WB)) {
1226                struct bio_vec bvec;
1227
1228                zram_slot_unlock(zram, index);
1229
1230                bvec.bv_page = page;
1231                bvec.bv_len = PAGE_SIZE;
1232                bvec.bv_offset = 0;
1233                return read_from_bdev(zram, &bvec,
1234                                zram_get_element(zram, index),
1235                                bio, partial_io);
1236        }
1237
1238        handle = zram_get_handle(zram, index);
1239        if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1240                unsigned long value;
1241                void *mem;
1242
1243                value = handle ? zram_get_element(zram, index) : 0;
1244                mem = kmap_atomic(page);
1245                zram_fill_page(mem, PAGE_SIZE, value);
1246                kunmap_atomic(mem);
1247                zram_slot_unlock(zram, index);
1248                return 0;
1249        }
1250
1251        size = zram_get_obj_size(zram, index);
1252
1253        src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1254        if (size == PAGE_SIZE) {
1255                dst = kmap_atomic(page);
1256                memcpy(dst, src, PAGE_SIZE);
1257                kunmap_atomic(dst);
1258                ret = 0;
1259        } else {
1260                struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
1261
1262                dst = kmap_atomic(page);
1263                ret = zcomp_decompress(zstrm, src, size, dst);
1264                kunmap_atomic(dst);
1265                zcomp_stream_put(zram->comp);
1266        }
1267        zs_unmap_object(zram->mem_pool, handle);
1268        zram_slot_unlock(zram, index);
1269
1270        /* Should NEVER happen. Return bio error if it does. */
1271        if (unlikely(ret))
1272                pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1273
1274        return ret;
1275}
1276
1277static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1278                                u32 index, int offset, struct bio *bio)
1279{
1280        int ret;
1281        struct page *page;
1282
1283        page = bvec->bv_page;
1284        if (is_partial_io(bvec)) {
1285                /* Use a temporary buffer to decompress the page */
1286                page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1287                if (!page)
1288                        return -ENOMEM;
1289        }
1290
1291        ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1292        if (unlikely(ret))
1293                goto out;
1294
1295        if (is_partial_io(bvec)) {
1296                void *dst = kmap_atomic(bvec->bv_page);
1297                void *src = kmap_atomic(page);
1298
1299                memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1300                kunmap_atomic(src);
1301                kunmap_atomic(dst);
1302        }
1303out:
1304        if (is_partial_io(bvec))
1305                __free_page(page);
1306
1307        return ret;
1308}
1309
1310static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1311                                u32 index, struct bio *bio)
1312{
1313        int ret = 0;
1314        unsigned long alloced_pages;
1315        unsigned long handle = 0;
1316        unsigned int comp_len = 0;
1317        void *src, *dst, *mem;
1318        struct zcomp_strm *zstrm;
1319        struct page *page = bvec->bv_page;
1320        unsigned long element = 0;
1321        enum zram_pageflags flags = 0;
1322
1323        mem = kmap_atomic(page);
1324        if (page_same_filled(mem, &element)) {
1325                kunmap_atomic(mem);
1326                /* Free memory associated with this sector now. */
1327                flags = ZRAM_SAME;
1328                atomic64_inc(&zram->stats.same_pages);
1329                goto out;
1330        }
1331        kunmap_atomic(mem);
1332
1333compress_again:
1334        zstrm = zcomp_stream_get(zram->comp);
1335        src = kmap_atomic(page);
1336        ret = zcomp_compress(zstrm, src, &comp_len);
1337        kunmap_atomic(src);
1338
1339        if (unlikely(ret)) {
1340                zcomp_stream_put(zram->comp);
1341                pr_err("Compression failed! err=%d\n", ret);
1342                zs_free(zram->mem_pool, handle);
1343                return ret;
1344        }
1345
1346        if (comp_len >= huge_class_size)
1347                comp_len = PAGE_SIZE;
1348        /*
1349         * handle allocation has 2 paths:
1350         * a) fast path is executed with preemption disabled (for
1351         *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1352         *  since we can't sleep;
1353         * b) slow path enables preemption and attempts to allocate
1354         *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1355         *  put per-cpu compression stream and, thus, to re-do
1356         *  the compression once handle is allocated.
1357         *
1358         * if we have a 'non-null' handle here then we are coming
1359         * from the slow path and handle has already been allocated.
1360         */
1361        if (!handle)
1362                handle = zs_malloc(zram->mem_pool, comp_len,
1363                                __GFP_KSWAPD_RECLAIM |
1364                                __GFP_NOWARN |
1365                                __GFP_HIGHMEM |
1366                                __GFP_MOVABLE);
1367        if (!handle) {
1368                zcomp_stream_put(zram->comp);
1369                atomic64_inc(&zram->stats.writestall);
1370                handle = zs_malloc(zram->mem_pool, comp_len,
1371                                GFP_NOIO | __GFP_HIGHMEM |
1372                                __GFP_MOVABLE);
1373                if (handle)
1374                        goto compress_again;
1375                return -ENOMEM;
1376        }
1377
1378        alloced_pages = zs_get_total_pages(zram->mem_pool);
1379        update_used_max(zram, alloced_pages);
1380
1381        if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1382                zcomp_stream_put(zram->comp);
1383                zs_free(zram->mem_pool, handle);
1384                return -ENOMEM;
1385        }
1386
1387        dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1388
1389        src = zstrm->buffer;
1390        if (comp_len == PAGE_SIZE)
1391                src = kmap_atomic(page);
1392        memcpy(dst, src, comp_len);
1393        if (comp_len == PAGE_SIZE)
1394                kunmap_atomic(src);
1395
1396        zcomp_stream_put(zram->comp);
1397        zs_unmap_object(zram->mem_pool, handle);
1398        atomic64_add(comp_len, &zram->stats.compr_data_size);
1399out:
1400        /*
1401         * Free memory associated with this sector
1402         * before overwriting unused sectors.
1403         */
1404        zram_slot_lock(zram, index);
1405        zram_free_page(zram, index);
1406
1407        if (comp_len == PAGE_SIZE) {
1408                zram_set_flag(zram, index, ZRAM_HUGE);
1409                atomic64_inc(&zram->stats.huge_pages);
1410        }
1411
1412        if (flags) {
1413                zram_set_flag(zram, index, flags);
1414                zram_set_element(zram, index, element);
1415        }  else {
1416                zram_set_handle(zram, index, handle);
1417                zram_set_obj_size(zram, index, comp_len);
1418        }
1419        zram_slot_unlock(zram, index);
1420
1421        /* Update stats */
1422        atomic64_inc(&zram->stats.pages_stored);
1423        return ret;
1424}
1425
1426static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1427                                u32 index, int offset, struct bio *bio)
1428{
1429        int ret;
1430        struct page *page = NULL;
1431        void *src;
1432        struct bio_vec vec;
1433
1434        vec = *bvec;
1435        if (is_partial_io(bvec)) {
1436                void *dst;
1437                /*
1438                 * This is a partial IO. We need to read the full page
1439                 * before to write the changes.
1440                 */
1441                page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1442                if (!page)
1443                        return -ENOMEM;
1444
1445                ret = __zram_bvec_read(zram, page, index, bio, true);
1446                if (ret)
1447                        goto out;
1448
1449                src = kmap_atomic(bvec->bv_page);
1450                dst = kmap_atomic(page);
1451                memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1452                kunmap_atomic(dst);
1453                kunmap_atomic(src);
1454
1455                vec.bv_page = page;
1456                vec.bv_len = PAGE_SIZE;
1457                vec.bv_offset = 0;
1458        }
1459
1460        ret = __zram_bvec_write(zram, &vec, index, bio);
1461out:
1462        if (is_partial_io(bvec))
1463                __free_page(page);
1464        return ret;
1465}
1466
1467/*
1468 * zram_bio_discard - handler on discard request
1469 * @index: physical block index in PAGE_SIZE units
1470 * @offset: byte offset within physical block
1471 */
1472static void zram_bio_discard(struct zram *zram, u32 index,
1473                             int offset, struct bio *bio)
1474{
1475        size_t n = bio->bi_iter.bi_size;
1476
1477        /*
1478         * zram manages data in physical block size units. Because logical block
1479         * size isn't identical with physical block size on some arch, we
1480         * could get a discard request pointing to a specific offset within a
1481         * certain physical block.  Although we can handle this request by
1482         * reading that physiclal block and decompressing and partially zeroing
1483         * and re-compressing and then re-storing it, this isn't reasonable
1484         * because our intent with a discard request is to save memory.  So
1485         * skipping this logical block is appropriate here.
1486         */
1487        if (offset) {
1488                if (n <= (PAGE_SIZE - offset))
1489                        return;
1490
1491                n -= (PAGE_SIZE - offset);
1492                index++;
1493        }
1494
1495        while (n >= PAGE_SIZE) {
1496                zram_slot_lock(zram, index);
1497                zram_free_page(zram, index);
1498                zram_slot_unlock(zram, index);
1499                atomic64_inc(&zram->stats.notify_free);
1500                index++;
1501                n -= PAGE_SIZE;
1502        }
1503}
1504
1505/*
1506 * Returns errno if it has some problem. Otherwise return 0 or 1.
1507 * Returns 0 if IO request was done synchronously
1508 * Returns 1 if IO request was successfully submitted.
1509 */
1510static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1511                        int offset, unsigned int op, struct bio *bio)
1512{
1513        int ret;
1514
1515        if (!op_is_write(op)) {
1516                atomic64_inc(&zram->stats.num_reads);
1517                ret = zram_bvec_read(zram, bvec, index, offset, bio);
1518                flush_dcache_page(bvec->bv_page);
1519        } else {
1520                atomic64_inc(&zram->stats.num_writes);
1521                ret = zram_bvec_write(zram, bvec, index, offset, bio);
1522        }
1523
1524        zram_slot_lock(zram, index);
1525        zram_accessed(zram, index);
1526        zram_slot_unlock(zram, index);
1527
1528        if (unlikely(ret < 0)) {
1529                if (!op_is_write(op))
1530                        atomic64_inc(&zram->stats.failed_reads);
1531                else
1532                        atomic64_inc(&zram->stats.failed_writes);
1533        }
1534
1535        return ret;
1536}
1537
1538static void __zram_make_request(struct zram *zram, struct bio *bio)
1539{
1540        int offset;
1541        u32 index;
1542        struct bio_vec bvec;
1543        struct bvec_iter iter;
1544        unsigned long start_time;
1545
1546        index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1547        offset = (bio->bi_iter.bi_sector &
1548                  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1549
1550        switch (bio_op(bio)) {
1551        case REQ_OP_DISCARD:
1552        case REQ_OP_WRITE_ZEROES:
1553                zram_bio_discard(zram, index, offset, bio);
1554                bio_endio(bio);
1555                return;
1556        default:
1557                break;
1558        }
1559
1560        start_time = bio_start_io_acct(bio);
1561        bio_for_each_segment(bvec, bio, iter) {
1562                struct bio_vec bv = bvec;
1563                unsigned int unwritten = bvec.bv_len;
1564
1565                do {
1566                        bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1567                                                        unwritten);
1568                        if (zram_bvec_rw(zram, &bv, index, offset,
1569                                         bio_op(bio), bio) < 0) {
1570                                bio->bi_status = BLK_STS_IOERR;
1571                                break;
1572                        }
1573
1574                        bv.bv_offset += bv.bv_len;
1575                        unwritten -= bv.bv_len;
1576
1577                        update_position(&index, &offset, &bv);
1578                } while (unwritten);
1579        }
1580        bio_end_io_acct(bio, start_time);
1581        bio_endio(bio);
1582}
1583
1584/*
1585 * Handler function for all zram I/O requests.
1586 */
1587static blk_qc_t zram_submit_bio(struct bio *bio)
1588{
1589        struct zram *zram = bio->bi_disk->private_data;
1590
1591        if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1592                                        bio->bi_iter.bi_size)) {
1593                atomic64_inc(&zram->stats.invalid_io);
1594                goto error;
1595        }
1596
1597        __zram_make_request(zram, bio);
1598        return BLK_QC_T_NONE;
1599
1600error:
1601        bio_io_error(bio);
1602        return BLK_QC_T_NONE;
1603}
1604
1605static void zram_slot_free_notify(struct block_device *bdev,
1606                                unsigned long index)
1607{
1608        struct zram *zram;
1609
1610        zram = bdev->bd_disk->private_data;
1611
1612        atomic64_inc(&zram->stats.notify_free);
1613        if (!zram_slot_trylock(zram, index)) {
1614                atomic64_inc(&zram->stats.miss_free);
1615                return;
1616        }
1617
1618        zram_free_page(zram, index);
1619        zram_slot_unlock(zram, index);
1620}
1621
1622static int zram_rw_page(struct block_device *bdev, sector_t sector,
1623                       struct page *page, unsigned int op)
1624{
1625        int offset, ret;
1626        u32 index;
1627        struct zram *zram;
1628        struct bio_vec bv;
1629        unsigned long start_time;
1630
1631        if (PageTransHuge(page))
1632                return -ENOTSUPP;
1633        zram = bdev->bd_disk->private_data;
1634
1635        if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1636                atomic64_inc(&zram->stats.invalid_io);
1637                ret = -EINVAL;
1638                goto out;
1639        }
1640
1641        index = sector >> SECTORS_PER_PAGE_SHIFT;
1642        offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1643
1644        bv.bv_page = page;
1645        bv.bv_len = PAGE_SIZE;
1646        bv.bv_offset = 0;
1647
1648        start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
1649        ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1650        disk_end_io_acct(bdev->bd_disk, op, start_time);
1651out:
1652        /*
1653         * If I/O fails, just return error(ie, non-zero) without
1654         * calling page_endio.
1655         * It causes resubmit the I/O with bio request by upper functions
1656         * of rw_page(e.g., swap_readpage, __swap_writepage) and
1657         * bio->bi_end_io does things to handle the error
1658         * (e.g., SetPageError, set_page_dirty and extra works).
1659         */
1660        if (unlikely(ret < 0))
1661                return ret;
1662
1663        switch (ret) {
1664        case 0:
1665                page_endio(page, op_is_write(op), 0);
1666                break;
1667        case 1:
1668                ret = 0;
1669                break;
1670        default:
1671                WARN_ON(1);
1672        }
1673        return ret;
1674}
1675
1676static void zram_reset_device(struct zram *zram)
1677{
1678        struct zcomp *comp;
1679        u64 disksize;
1680
1681        down_write(&zram->init_lock);
1682
1683        zram->limit_pages = 0;
1684
1685        if (!init_done(zram)) {
1686                up_write(&zram->init_lock);
1687                return;
1688        }
1689
1690        comp = zram->comp;
1691        disksize = zram->disksize;
1692        zram->disksize = 0;
1693
1694        set_capacity(zram->disk, 0);
1695        part_stat_set_all(&zram->disk->part0, 0);
1696
1697        up_write(&zram->init_lock);
1698        /* I/O operation under all of CPU are done so let's free */
1699        zram_meta_free(zram, disksize);
1700        memset(&zram->stats, 0, sizeof(zram->stats));
1701        zcomp_destroy(comp);
1702        reset_bdev(zram);
1703}
1704
1705static ssize_t disksize_store(struct device *dev,
1706                struct device_attribute *attr, const char *buf, size_t len)
1707{
1708        u64 disksize;
1709        struct zcomp *comp;
1710        struct zram *zram = dev_to_zram(dev);
1711        int err;
1712
1713        disksize = memparse(buf, NULL);
1714        if (!disksize)
1715                return -EINVAL;
1716
1717        down_write(&zram->init_lock);
1718        if (init_done(zram)) {
1719                pr_info("Cannot change disksize for initialized device\n");
1720                err = -EBUSY;
1721                goto out_unlock;
1722        }
1723
1724        disksize = PAGE_ALIGN(disksize);
1725        if (!zram_meta_alloc(zram, disksize)) {
1726                err = -ENOMEM;
1727                goto out_unlock;
1728        }
1729
1730        comp = zcomp_create(zram->compressor);
1731        if (IS_ERR(comp)) {
1732                pr_err("Cannot initialise %s compressing backend\n",
1733                                zram->compressor);
1734                err = PTR_ERR(comp);
1735                goto out_free_meta;
1736        }
1737
1738        zram->comp = comp;
1739        zram->disksize = disksize;
1740        set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1741
1742        revalidate_disk(zram->disk);
1743        up_write(&zram->init_lock);
1744
1745        return len;
1746
1747out_free_meta:
1748        zram_meta_free(zram, disksize);
1749out_unlock:
1750        up_write(&zram->init_lock);
1751        return err;
1752}
1753
1754static ssize_t reset_store(struct device *dev,
1755                struct device_attribute *attr, const char *buf, size_t len)
1756{
1757        int ret;
1758        unsigned short do_reset;
1759        struct zram *zram;
1760        struct block_device *bdev;
1761
1762        ret = kstrtou16(buf, 10, &do_reset);
1763        if (ret)
1764                return ret;
1765
1766        if (!do_reset)
1767                return -EINVAL;
1768
1769        zram = dev_to_zram(dev);
1770        bdev = bdget_disk(zram->disk, 0);
1771        if (!bdev)
1772                return -ENOMEM;
1773
1774        mutex_lock(&bdev->bd_mutex);
1775        /* Do not reset an active device or claimed device */
1776        if (bdev->bd_openers || zram->claim) {
1777                mutex_unlock(&bdev->bd_mutex);
1778                bdput(bdev);
1779                return -EBUSY;
1780        }
1781
1782        /* From now on, anyone can't open /dev/zram[0-9] */
1783        zram->claim = true;
1784        mutex_unlock(&bdev->bd_mutex);
1785
1786        /* Make sure all the pending I/O are finished */
1787        fsync_bdev(bdev);
1788        zram_reset_device(zram);
1789        revalidate_disk(zram->disk);
1790        bdput(bdev);
1791
1792        mutex_lock(&bdev->bd_mutex);
1793        zram->claim = false;
1794        mutex_unlock(&bdev->bd_mutex);
1795
1796        return len;
1797}
1798
1799static int zram_open(struct block_device *bdev, fmode_t mode)
1800{
1801        int ret = 0;
1802        struct zram *zram;
1803
1804        WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1805
1806        zram = bdev->bd_disk->private_data;
1807        /* zram was claimed to reset so open request fails */
1808        if (zram->claim)
1809                ret = -EBUSY;
1810
1811        return ret;
1812}
1813
1814static const struct block_device_operations zram_devops = {
1815        .open = zram_open,
1816        .submit_bio = zram_submit_bio,
1817        .swap_slot_free_notify = zram_slot_free_notify,
1818        .rw_page = zram_rw_page,
1819        .owner = THIS_MODULE
1820};
1821
1822static DEVICE_ATTR_WO(compact);
1823static DEVICE_ATTR_RW(disksize);
1824static DEVICE_ATTR_RO(initstate);
1825static DEVICE_ATTR_WO(reset);
1826static DEVICE_ATTR_WO(mem_limit);
1827static DEVICE_ATTR_WO(mem_used_max);
1828static DEVICE_ATTR_WO(idle);
1829static DEVICE_ATTR_RW(max_comp_streams);
1830static DEVICE_ATTR_RW(comp_algorithm);
1831#ifdef CONFIG_ZRAM_WRITEBACK
1832static DEVICE_ATTR_RW(backing_dev);
1833static DEVICE_ATTR_WO(writeback);
1834static DEVICE_ATTR_RW(writeback_limit);
1835static DEVICE_ATTR_RW(writeback_limit_enable);
1836#endif
1837
1838static struct attribute *zram_disk_attrs[] = {
1839        &dev_attr_disksize.attr,
1840        &dev_attr_initstate.attr,
1841        &dev_attr_reset.attr,
1842        &dev_attr_compact.attr,
1843        &dev_attr_mem_limit.attr,
1844        &dev_attr_mem_used_max.attr,
1845        &dev_attr_idle.attr,
1846        &dev_attr_max_comp_streams.attr,
1847        &dev_attr_comp_algorithm.attr,
1848#ifdef CONFIG_ZRAM_WRITEBACK
1849        &dev_attr_backing_dev.attr,
1850        &dev_attr_writeback.attr,
1851        &dev_attr_writeback_limit.attr,
1852        &dev_attr_writeback_limit_enable.attr,
1853#endif
1854        &dev_attr_io_stat.attr,
1855        &dev_attr_mm_stat.attr,
1856#ifdef CONFIG_ZRAM_WRITEBACK
1857        &dev_attr_bd_stat.attr,
1858#endif
1859        &dev_attr_debug_stat.attr,
1860        NULL,
1861};
1862
1863static const struct attribute_group zram_disk_attr_group = {
1864        .attrs = zram_disk_attrs,
1865};
1866
1867static const struct attribute_group *zram_disk_attr_groups[] = {
1868        &zram_disk_attr_group,
1869        NULL,
1870};
1871
1872/*
1873 * Allocate and initialize new zram device. the function returns
1874 * '>= 0' device_id upon success, and negative value otherwise.
1875 */
1876static int zram_add(void)
1877{
1878        struct zram *zram;
1879        struct request_queue *queue;
1880        int ret, device_id;
1881
1882        zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1883        if (!zram)
1884                return -ENOMEM;
1885
1886        ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1887        if (ret < 0)
1888                goto out_free_dev;
1889        device_id = ret;
1890
1891        init_rwsem(&zram->init_lock);
1892#ifdef CONFIG_ZRAM_WRITEBACK
1893        spin_lock_init(&zram->wb_limit_lock);
1894#endif
1895        queue = blk_alloc_queue(NUMA_NO_NODE);
1896        if (!queue) {
1897                pr_err("Error allocating disk queue for device %d\n",
1898                        device_id);
1899                ret = -ENOMEM;
1900                goto out_free_idr;
1901        }
1902
1903        /* gendisk structure */
1904        zram->disk = alloc_disk(1);
1905        if (!zram->disk) {
1906                pr_err("Error allocating disk structure for device %d\n",
1907                        device_id);
1908                ret = -ENOMEM;
1909                goto out_free_queue;
1910        }
1911
1912        zram->disk->major = zram_major;
1913        zram->disk->first_minor = device_id;
1914        zram->disk->fops = &zram_devops;
1915        zram->disk->queue = queue;
1916        zram->disk->private_data = zram;
1917        snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1918
1919        /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1920        set_capacity(zram->disk, 0);
1921        /* zram devices sort of resembles non-rotational disks */
1922        blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1923        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1924
1925        /*
1926         * To ensure that we always get PAGE_SIZE aligned
1927         * and n*PAGE_SIZED sized I/O requests.
1928         */
1929        blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1930        blk_queue_logical_block_size(zram->disk->queue,
1931                                        ZRAM_LOGICAL_BLOCK_SIZE);
1932        blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1933        blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1934        zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1935        blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1936        blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1937
1938        /*
1939         * zram_bio_discard() will clear all logical blocks if logical block
1940         * size is identical with physical block size(PAGE_SIZE). But if it is
1941         * different, we will skip discarding some parts of logical blocks in
1942         * the part of the request range which isn't aligned to physical block
1943         * size.  So we can't ensure that all discarded logical blocks are
1944         * zeroed.
1945         */
1946        if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1947                blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1948
1949        zram->disk->queue->backing_dev_info->capabilities |=
1950                        (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
1951        device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
1952
1953        strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1954
1955        zram_debugfs_register(zram);
1956        pr_info("Added device: %s\n", zram->disk->disk_name);
1957        return device_id;
1958
1959out_free_queue:
1960        blk_cleanup_queue(queue);
1961out_free_idr:
1962        idr_remove(&zram_index_idr, device_id);
1963out_free_dev:
1964        kfree(zram);
1965        return ret;
1966}
1967
1968static int zram_remove(struct zram *zram)
1969{
1970        struct block_device *bdev;
1971
1972        bdev = bdget_disk(zram->disk, 0);
1973        if (!bdev)
1974                return -ENOMEM;
1975
1976        mutex_lock(&bdev->bd_mutex);
1977        if (bdev->bd_openers || zram->claim) {
1978                mutex_unlock(&bdev->bd_mutex);
1979                bdput(bdev);
1980                return -EBUSY;
1981        }
1982
1983        zram->claim = true;
1984        mutex_unlock(&bdev->bd_mutex);
1985
1986        zram_debugfs_unregister(zram);
1987
1988        /* Make sure all the pending I/O are finished */
1989        fsync_bdev(bdev);
1990        zram_reset_device(zram);
1991        bdput(bdev);
1992
1993        pr_info("Removed device: %s\n", zram->disk->disk_name);
1994
1995        del_gendisk(zram->disk);
1996        blk_cleanup_queue(zram->disk->queue);
1997        put_disk(zram->disk);
1998        kfree(zram);
1999        return 0;
2000}
2001
2002/* zram-control sysfs attributes */
2003
2004/*
2005 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2006 * sense that reading from this file does alter the state of your system -- it
2007 * creates a new un-initialized zram device and returns back this device's
2008 * device_id (or an error code if it fails to create a new device).
2009 */
2010static ssize_t hot_add_show(struct class *class,
2011                        struct class_attribute *attr,
2012                        char *buf)
2013{
2014        int ret;
2015
2016        mutex_lock(&zram_index_mutex);
2017        ret = zram_add();
2018        mutex_unlock(&zram_index_mutex);
2019
2020        if (ret < 0)
2021                return ret;
2022        return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2023}
2024static struct class_attribute class_attr_hot_add =
2025        __ATTR(hot_add, 0400, hot_add_show, NULL);
2026
2027static ssize_t hot_remove_store(struct class *class,
2028                        struct class_attribute *attr,
2029                        const char *buf,
2030                        size_t count)
2031{
2032        struct zram *zram;
2033        int ret, dev_id;
2034
2035        /* dev_id is gendisk->first_minor, which is `int' */
2036        ret = kstrtoint(buf, 10, &dev_id);
2037        if (ret)
2038                return ret;
2039        if (dev_id < 0)
2040                return -EINVAL;
2041
2042        mutex_lock(&zram_index_mutex);
2043
2044        zram = idr_find(&zram_index_idr, dev_id);
2045        if (zram) {
2046                ret = zram_remove(zram);
2047                if (!ret)
2048                        idr_remove(&zram_index_idr, dev_id);
2049        } else {
2050                ret = -ENODEV;
2051        }
2052
2053        mutex_unlock(&zram_index_mutex);
2054        return ret ? ret : count;
2055}
2056static CLASS_ATTR_WO(hot_remove);
2057
2058static struct attribute *zram_control_class_attrs[] = {
2059        &class_attr_hot_add.attr,
2060        &class_attr_hot_remove.attr,
2061        NULL,
2062};
2063ATTRIBUTE_GROUPS(zram_control_class);
2064
2065static struct class zram_control_class = {
2066        .name           = "zram-control",
2067        .owner          = THIS_MODULE,
2068        .class_groups   = zram_control_class_groups,
2069};
2070
2071static int zram_remove_cb(int id, void *ptr, void *data)
2072{
2073        zram_remove(ptr);
2074        return 0;
2075}
2076
2077static void destroy_devices(void)
2078{
2079        class_unregister(&zram_control_class);
2080        idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2081        zram_debugfs_destroy();
2082        idr_destroy(&zram_index_idr);
2083        unregister_blkdev(zram_major, "zram");
2084        cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2085}
2086
2087static int __init zram_init(void)
2088{
2089        int ret;
2090
2091        ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2092                                      zcomp_cpu_up_prepare, zcomp_cpu_dead);
2093        if (ret < 0)
2094                return ret;
2095
2096        ret = class_register(&zram_control_class);
2097        if (ret) {
2098                pr_err("Unable to register zram-control class\n");
2099                cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2100                return ret;
2101        }
2102
2103        zram_debugfs_create();
2104        zram_major = register_blkdev(0, "zram");
2105        if (zram_major <= 0) {
2106                pr_err("Unable to get major number\n");
2107                class_unregister(&zram_control_class);
2108                cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2109                return -EBUSY;
2110        }
2111
2112        while (num_devices != 0) {
2113                mutex_lock(&zram_index_mutex);
2114                ret = zram_add();
2115                mutex_unlock(&zram_index_mutex);
2116                if (ret < 0)
2117                        goto out_error;
2118                num_devices--;
2119        }
2120
2121        return 0;
2122
2123out_error:
2124        destroy_devices();
2125        return ret;
2126}
2127
2128static void __exit zram_exit(void)
2129{
2130        destroy_devices();
2131}
2132
2133module_init(zram_init);
2134module_exit(zram_exit);
2135
2136module_param(num_devices, uint, 0);
2137MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2138
2139MODULE_LICENSE("Dual BSD/GPL");
2140MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2141MODULE_DESCRIPTION("Compressed RAM Block Device");
2142