linux/drivers/block/zram/zram_drv.c
<<
>>
Prefs
   1/*
   2 * Compressed RAM block device
   3 *
   4 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
   5 *               2012, 2013 Minchan Kim
   6 *
   7 * This code is released using a dual license strategy: BSD/GPL
   8 * You can choose the licence that better fits your requirements.
   9 *
  10 * Released under the terms of 3-clause BSD License
  11 * Released under the terms of GNU General Public License Version 2.0
  12 *
  13 */
  14
  15#define KMSG_COMPONENT "zram"
  16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  17
  18#include <linux/module.h>
  19#include <linux/kernel.h>
  20#include <linux/bio.h>
  21#include <linux/bitops.h>
  22#include <linux/blkdev.h>
  23#include <linux/buffer_head.h>
  24#include <linux/device.h>
  25#include <linux/genhd.h>
  26#include <linux/highmem.h>
  27#include <linux/slab.h>
  28#include <linux/backing-dev.h>
  29#include <linux/string.h>
  30#include <linux/vmalloc.h>
  31#include <linux/err.h>
  32#include <linux/idr.h>
  33#include <linux/sysfs.h>
  34#include <linux/debugfs.h>
  35#include <linux/cpuhotplug.h>
  36
  37#include "zram_drv.h"
  38
  39static DEFINE_IDR(zram_index_idr);
  40/* idr index must be protected */
  41static DEFINE_MUTEX(zram_index_mutex);
  42
  43static int zram_major;
  44static const char *default_compressor = "lzo-rle";
  45
  46/* Module params (documentation at end) */
  47static unsigned int num_devices = 1;
  48/*
  49 * Pages that compress to sizes equals or greater than this are stored
  50 * uncompressed in memory.
  51 */
  52static size_t huge_class_size;
  53
  54static void zram_free_page(struct zram *zram, size_t index);
  55static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
  56                                u32 index, int offset, struct bio *bio);
  57
  58
  59static int zram_slot_trylock(struct zram *zram, u32 index)
  60{
  61        return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
  62}
  63
  64static void zram_slot_lock(struct zram *zram, u32 index)
  65{
  66        bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
  67}
  68
  69static void zram_slot_unlock(struct zram *zram, u32 index)
  70{
  71        bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
  72}
  73
  74static inline bool init_done(struct zram *zram)
  75{
  76        return zram->disksize;
  77}
  78
  79static inline struct zram *dev_to_zram(struct device *dev)
  80{
  81        return (struct zram *)dev_to_disk(dev)->private_data;
  82}
  83
  84static unsigned long zram_get_handle(struct zram *zram, u32 index)
  85{
  86        return zram->table[index].handle;
  87}
  88
  89static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
  90{
  91        zram->table[index].handle = handle;
  92}
  93
  94/* flag operations require table entry bit_spin_lock() being held */
  95static bool zram_test_flag(struct zram *zram, u32 index,
  96                        enum zram_pageflags flag)
  97{
  98        return zram->table[index].flags & BIT(flag);
  99}
 100
 101static void zram_set_flag(struct zram *zram, u32 index,
 102                        enum zram_pageflags flag)
 103{
 104        zram->table[index].flags |= BIT(flag);
 105}
 106
 107static void zram_clear_flag(struct zram *zram, u32 index,
 108                        enum zram_pageflags flag)
 109{
 110        zram->table[index].flags &= ~BIT(flag);
 111}
 112
 113static inline void zram_set_element(struct zram *zram, u32 index,
 114                        unsigned long element)
 115{
 116        zram->table[index].element = element;
 117}
 118
 119static unsigned long zram_get_element(struct zram *zram, u32 index)
 120{
 121        return zram->table[index].element;
 122}
 123
 124static size_t zram_get_obj_size(struct zram *zram, u32 index)
 125{
 126        return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
 127}
 128
 129static void zram_set_obj_size(struct zram *zram,
 130                                        u32 index, size_t size)
 131{
 132        unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
 133
 134        zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
 135}
 136
 137static inline bool zram_allocated(struct zram *zram, u32 index)
 138{
 139        return zram_get_obj_size(zram, index) ||
 140                        zram_test_flag(zram, index, ZRAM_SAME) ||
 141                        zram_test_flag(zram, index, ZRAM_WB);
 142}
 143
 144#if PAGE_SIZE != 4096
 145static inline bool is_partial_io(struct bio_vec *bvec)
 146{
 147        return bvec->bv_len != PAGE_SIZE;
 148}
 149#else
 150static inline bool is_partial_io(struct bio_vec *bvec)
 151{
 152        return false;
 153}
 154#endif
 155
 156/*
 157 * Check if request is within bounds and aligned on zram logical blocks.
 158 */
 159static inline bool valid_io_request(struct zram *zram,
 160                sector_t start, unsigned int size)
 161{
 162        u64 end, bound;
 163
 164        /* unaligned request */
 165        if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
 166                return false;
 167        if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
 168                return false;
 169
 170        end = start + (size >> SECTOR_SHIFT);
 171        bound = zram->disksize >> SECTOR_SHIFT;
 172        /* out of range range */
 173        if (unlikely(start >= bound || end > bound || start > end))
 174                return false;
 175
 176        /* I/O request is valid */
 177        return true;
 178}
 179
 180static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
 181{
 182        *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
 183        *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
 184}
 185
 186static inline void update_used_max(struct zram *zram,
 187                                        const unsigned long pages)
 188{
 189        unsigned long old_max, cur_max;
 190
 191        old_max = atomic_long_read(&zram->stats.max_used_pages);
 192
 193        do {
 194                cur_max = old_max;
 195                if (pages > cur_max)
 196                        old_max = atomic_long_cmpxchg(
 197                                &zram->stats.max_used_pages, cur_max, pages);
 198        } while (old_max != cur_max);
 199}
 200
 201static inline void zram_fill_page(void *ptr, unsigned long len,
 202                                        unsigned long value)
 203{
 204        WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
 205        memset_l(ptr, value, len / sizeof(unsigned long));
 206}
 207
 208static bool page_same_filled(void *ptr, unsigned long *element)
 209{
 210        unsigned int pos;
 211        unsigned long *page;
 212        unsigned long val;
 213
 214        page = (unsigned long *)ptr;
 215        val = page[0];
 216
 217        for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
 218                if (val != page[pos])
 219                        return false;
 220        }
 221
 222        *element = val;
 223
 224        return true;
 225}
 226
 227static ssize_t initstate_show(struct device *dev,
 228                struct device_attribute *attr, char *buf)
 229{
 230        u32 val;
 231        struct zram *zram = dev_to_zram(dev);
 232
 233        down_read(&zram->init_lock);
 234        val = init_done(zram);
 235        up_read(&zram->init_lock);
 236
 237        return scnprintf(buf, PAGE_SIZE, "%u\n", val);
 238}
 239
 240static ssize_t disksize_show(struct device *dev,
 241                struct device_attribute *attr, char *buf)
 242{
 243        struct zram *zram = dev_to_zram(dev);
 244
 245        return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
 246}
 247
 248static ssize_t mem_limit_store(struct device *dev,
 249                struct device_attribute *attr, const char *buf, size_t len)
 250{
 251        u64 limit;
 252        char *tmp;
 253        struct zram *zram = dev_to_zram(dev);
 254
 255        limit = memparse(buf, &tmp);
 256        if (buf == tmp) /* no chars parsed, invalid input */
 257                return -EINVAL;
 258
 259        down_write(&zram->init_lock);
 260        zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
 261        up_write(&zram->init_lock);
 262
 263        return len;
 264}
 265
 266static ssize_t mem_used_max_store(struct device *dev,
 267                struct device_attribute *attr, const char *buf, size_t len)
 268{
 269        int err;
 270        unsigned long val;
 271        struct zram *zram = dev_to_zram(dev);
 272
 273        err = kstrtoul(buf, 10, &val);
 274        if (err || val != 0)
 275                return -EINVAL;
 276
 277        down_read(&zram->init_lock);
 278        if (init_done(zram)) {
 279                atomic_long_set(&zram->stats.max_used_pages,
 280                                zs_get_total_pages(zram->mem_pool));
 281        }
 282        up_read(&zram->init_lock);
 283
 284        return len;
 285}
 286
 287static ssize_t idle_store(struct device *dev,
 288                struct device_attribute *attr, const char *buf, size_t len)
 289{
 290        struct zram *zram = dev_to_zram(dev);
 291        unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 292        int index;
 293
 294        if (!sysfs_streq(buf, "all"))
 295                return -EINVAL;
 296
 297        down_read(&zram->init_lock);
 298        if (!init_done(zram)) {
 299                up_read(&zram->init_lock);
 300                return -EINVAL;
 301        }
 302
 303        for (index = 0; index < nr_pages; index++) {
 304                /*
 305                 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
 306                 * See the comment in writeback_store.
 307                 */
 308                zram_slot_lock(zram, index);
 309                if (zram_allocated(zram, index) &&
 310                                !zram_test_flag(zram, index, ZRAM_UNDER_WB))
 311                        zram_set_flag(zram, index, ZRAM_IDLE);
 312                zram_slot_unlock(zram, index);
 313        }
 314
 315        up_read(&zram->init_lock);
 316
 317        return len;
 318}
 319
 320#ifdef CONFIG_ZRAM_WRITEBACK
 321static ssize_t writeback_limit_enable_store(struct device *dev,
 322                struct device_attribute *attr, const char *buf, size_t len)
 323{
 324        struct zram *zram = dev_to_zram(dev);
 325        u64 val;
 326        ssize_t ret = -EINVAL;
 327
 328        if (kstrtoull(buf, 10, &val))
 329                return ret;
 330
 331        down_read(&zram->init_lock);
 332        spin_lock(&zram->wb_limit_lock);
 333        zram->wb_limit_enable = val;
 334        spin_unlock(&zram->wb_limit_lock);
 335        up_read(&zram->init_lock);
 336        ret = len;
 337
 338        return ret;
 339}
 340
 341static ssize_t writeback_limit_enable_show(struct device *dev,
 342                struct device_attribute *attr, char *buf)
 343{
 344        bool val;
 345        struct zram *zram = dev_to_zram(dev);
 346
 347        down_read(&zram->init_lock);
 348        spin_lock(&zram->wb_limit_lock);
 349        val = zram->wb_limit_enable;
 350        spin_unlock(&zram->wb_limit_lock);
 351        up_read(&zram->init_lock);
 352
 353        return scnprintf(buf, PAGE_SIZE, "%d\n", val);
 354}
 355
 356static ssize_t writeback_limit_store(struct device *dev,
 357                struct device_attribute *attr, const char *buf, size_t len)
 358{
 359        struct zram *zram = dev_to_zram(dev);
 360        u64 val;
 361        ssize_t ret = -EINVAL;
 362
 363        if (kstrtoull(buf, 10, &val))
 364                return ret;
 365
 366        down_read(&zram->init_lock);
 367        spin_lock(&zram->wb_limit_lock);
 368        zram->bd_wb_limit = val;
 369        spin_unlock(&zram->wb_limit_lock);
 370        up_read(&zram->init_lock);
 371        ret = len;
 372
 373        return ret;
 374}
 375
 376static ssize_t writeback_limit_show(struct device *dev,
 377                struct device_attribute *attr, char *buf)
 378{
 379        u64 val;
 380        struct zram *zram = dev_to_zram(dev);
 381
 382        down_read(&zram->init_lock);
 383        spin_lock(&zram->wb_limit_lock);
 384        val = zram->bd_wb_limit;
 385        spin_unlock(&zram->wb_limit_lock);
 386        up_read(&zram->init_lock);
 387
 388        return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
 389}
 390
 391static void reset_bdev(struct zram *zram)
 392{
 393        struct block_device *bdev;
 394
 395        if (!zram->backing_dev)
 396                return;
 397
 398        bdev = zram->bdev;
 399        if (zram->old_block_size)
 400                set_blocksize(bdev, zram->old_block_size);
 401        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 402        /* hope filp_close flush all of IO */
 403        filp_close(zram->backing_dev, NULL);
 404        zram->backing_dev = NULL;
 405        zram->old_block_size = 0;
 406        zram->bdev = NULL;
 407        zram->disk->queue->backing_dev_info->capabilities |=
 408                                BDI_CAP_SYNCHRONOUS_IO;
 409        kvfree(zram->bitmap);
 410        zram->bitmap = NULL;
 411}
 412
 413static ssize_t backing_dev_show(struct device *dev,
 414                struct device_attribute *attr, char *buf)
 415{
 416        struct zram *zram = dev_to_zram(dev);
 417        struct file *file = zram->backing_dev;
 418        char *p;
 419        ssize_t ret;
 420
 421        down_read(&zram->init_lock);
 422        if (!zram->backing_dev) {
 423                memcpy(buf, "none\n", 5);
 424                up_read(&zram->init_lock);
 425                return 5;
 426        }
 427
 428        p = file_path(file, buf, PAGE_SIZE - 1);
 429        if (IS_ERR(p)) {
 430                ret = PTR_ERR(p);
 431                goto out;
 432        }
 433
 434        ret = strlen(p);
 435        memmove(buf, p, ret);
 436        buf[ret++] = '\n';
 437out:
 438        up_read(&zram->init_lock);
 439        return ret;
 440}
 441
 442static ssize_t backing_dev_store(struct device *dev,
 443                struct device_attribute *attr, const char *buf, size_t len)
 444{
 445        char *file_name;
 446        size_t sz;
 447        struct file *backing_dev = NULL;
 448        struct inode *inode;
 449        struct address_space *mapping;
 450        unsigned int bitmap_sz, old_block_size = 0;
 451        unsigned long nr_pages, *bitmap = NULL;
 452        struct block_device *bdev = NULL;
 453        int err;
 454        struct zram *zram = dev_to_zram(dev);
 455
 456        file_name = kmalloc(PATH_MAX, GFP_KERNEL);
 457        if (!file_name)
 458                return -ENOMEM;
 459
 460        down_write(&zram->init_lock);
 461        if (init_done(zram)) {
 462                pr_info("Can't setup backing device for initialized device\n");
 463                err = -EBUSY;
 464                goto out;
 465        }
 466
 467        strlcpy(file_name, buf, PATH_MAX);
 468        /* ignore trailing newline */
 469        sz = strlen(file_name);
 470        if (sz > 0 && file_name[sz - 1] == '\n')
 471                file_name[sz - 1] = 0x00;
 472
 473        backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
 474        if (IS_ERR(backing_dev)) {
 475                err = PTR_ERR(backing_dev);
 476                backing_dev = NULL;
 477                goto out;
 478        }
 479
 480        mapping = backing_dev->f_mapping;
 481        inode = mapping->host;
 482
 483        /* Support only block device in this moment */
 484        if (!S_ISBLK(inode->i_mode)) {
 485                err = -ENOTBLK;
 486                goto out;
 487        }
 488
 489        bdev = bdgrab(I_BDEV(inode));
 490        err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
 491        if (err < 0) {
 492                bdev = NULL;
 493                goto out;
 494        }
 495
 496        nr_pages = i_size_read(inode) >> PAGE_SHIFT;
 497        bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
 498        bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
 499        if (!bitmap) {
 500                err = -ENOMEM;
 501                goto out;
 502        }
 503
 504        old_block_size = block_size(bdev);
 505        err = set_blocksize(bdev, PAGE_SIZE);
 506        if (err)
 507                goto out;
 508
 509        reset_bdev(zram);
 510
 511        zram->old_block_size = old_block_size;
 512        zram->bdev = bdev;
 513        zram->backing_dev = backing_dev;
 514        zram->bitmap = bitmap;
 515        zram->nr_pages = nr_pages;
 516        /*
 517         * With writeback feature, zram does asynchronous IO so it's no longer
 518         * synchronous device so let's remove synchronous io flag. Othewise,
 519         * upper layer(e.g., swap) could wait IO completion rather than
 520         * (submit and return), which will cause system sluggish.
 521         * Furthermore, when the IO function returns(e.g., swap_readpage),
 522         * upper layer expects IO was done so it could deallocate the page
 523         * freely but in fact, IO is going on so finally could cause
 524         * use-after-free when the IO is really done.
 525         */
 526        zram->disk->queue->backing_dev_info->capabilities &=
 527                        ~BDI_CAP_SYNCHRONOUS_IO;
 528        up_write(&zram->init_lock);
 529
 530        pr_info("setup backing device %s\n", file_name);
 531        kfree(file_name);
 532
 533        return len;
 534out:
 535        if (bitmap)
 536                kvfree(bitmap);
 537
 538        if (bdev)
 539                blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 540
 541        if (backing_dev)
 542                filp_close(backing_dev, NULL);
 543
 544        up_write(&zram->init_lock);
 545
 546        kfree(file_name);
 547
 548        return err;
 549}
 550
 551static unsigned long alloc_block_bdev(struct zram *zram)
 552{
 553        unsigned long blk_idx = 1;
 554retry:
 555        /* skip 0 bit to confuse zram.handle = 0 */
 556        blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
 557        if (blk_idx == zram->nr_pages)
 558                return 0;
 559
 560        if (test_and_set_bit(blk_idx, zram->bitmap))
 561                goto retry;
 562
 563        atomic64_inc(&zram->stats.bd_count);
 564        return blk_idx;
 565}
 566
 567static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
 568{
 569        int was_set;
 570
 571        was_set = test_and_clear_bit(blk_idx, zram->bitmap);
 572        WARN_ON_ONCE(!was_set);
 573        atomic64_dec(&zram->stats.bd_count);
 574}
 575
 576static void zram_page_end_io(struct bio *bio)
 577{
 578        struct page *page = bio_first_page_all(bio);
 579
 580        page_endio(page, op_is_write(bio_op(bio)),
 581                        blk_status_to_errno(bio->bi_status));
 582        bio_put(bio);
 583}
 584
 585/*
 586 * Returns 1 if the submission is successful.
 587 */
 588static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 589                        unsigned long entry, struct bio *parent)
 590{
 591        struct bio *bio;
 592
 593        bio = bio_alloc(GFP_ATOMIC, 1);
 594        if (!bio)
 595                return -ENOMEM;
 596
 597        bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
 598        bio_set_dev(bio, zram->bdev);
 599        if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
 600                bio_put(bio);
 601                return -EIO;
 602        }
 603
 604        if (!parent) {
 605                bio->bi_opf = REQ_OP_READ;
 606                bio->bi_end_io = zram_page_end_io;
 607        } else {
 608                bio->bi_opf = parent->bi_opf;
 609                bio_chain(bio, parent);
 610        }
 611
 612        submit_bio(bio);
 613        return 1;
 614}
 615
 616#define HUGE_WRITEBACK 1
 617#define IDLE_WRITEBACK 2
 618
 619static ssize_t writeback_store(struct device *dev,
 620                struct device_attribute *attr, const char *buf, size_t len)
 621{
 622        struct zram *zram = dev_to_zram(dev);
 623        unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 624        unsigned long index;
 625        struct bio bio;
 626        struct bio_vec bio_vec;
 627        struct page *page;
 628        ssize_t ret;
 629        int mode;
 630        unsigned long blk_idx = 0;
 631
 632        if (sysfs_streq(buf, "idle"))
 633                mode = IDLE_WRITEBACK;
 634        else if (sysfs_streq(buf, "huge"))
 635                mode = HUGE_WRITEBACK;
 636        else
 637                return -EINVAL;
 638
 639        down_read(&zram->init_lock);
 640        if (!init_done(zram)) {
 641                ret = -EINVAL;
 642                goto release_init_lock;
 643        }
 644
 645        if (!zram->backing_dev) {
 646                ret = -ENODEV;
 647                goto release_init_lock;
 648        }
 649
 650        page = alloc_page(GFP_KERNEL);
 651        if (!page) {
 652                ret = -ENOMEM;
 653                goto release_init_lock;
 654        }
 655
 656        for (index = 0; index < nr_pages; index++) {
 657                struct bio_vec bvec;
 658
 659                bvec.bv_page = page;
 660                bvec.bv_len = PAGE_SIZE;
 661                bvec.bv_offset = 0;
 662
 663                spin_lock(&zram->wb_limit_lock);
 664                if (zram->wb_limit_enable && !zram->bd_wb_limit) {
 665                        spin_unlock(&zram->wb_limit_lock);
 666                        ret = -EIO;
 667                        break;
 668                }
 669                spin_unlock(&zram->wb_limit_lock);
 670
 671                if (!blk_idx) {
 672                        blk_idx = alloc_block_bdev(zram);
 673                        if (!blk_idx) {
 674                                ret = -ENOSPC;
 675                                break;
 676                        }
 677                }
 678
 679                zram_slot_lock(zram, index);
 680                if (!zram_allocated(zram, index))
 681                        goto next;
 682
 683                if (zram_test_flag(zram, index, ZRAM_WB) ||
 684                                zram_test_flag(zram, index, ZRAM_SAME) ||
 685                                zram_test_flag(zram, index, ZRAM_UNDER_WB))
 686                        goto next;
 687
 688                if (mode == IDLE_WRITEBACK &&
 689                          !zram_test_flag(zram, index, ZRAM_IDLE))
 690                        goto next;
 691                if (mode == HUGE_WRITEBACK &&
 692                          !zram_test_flag(zram, index, ZRAM_HUGE))
 693                        goto next;
 694                /*
 695                 * Clearing ZRAM_UNDER_WB is duty of caller.
 696                 * IOW, zram_free_page never clear it.
 697                 */
 698                zram_set_flag(zram, index, ZRAM_UNDER_WB);
 699                /* Need for hugepage writeback racing */
 700                zram_set_flag(zram, index, ZRAM_IDLE);
 701                zram_slot_unlock(zram, index);
 702                if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
 703                        zram_slot_lock(zram, index);
 704                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 705                        zram_clear_flag(zram, index, ZRAM_IDLE);
 706                        zram_slot_unlock(zram, index);
 707                        continue;
 708                }
 709
 710                bio_init(&bio, &bio_vec, 1);
 711                bio_set_dev(&bio, zram->bdev);
 712                bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
 713                bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
 714
 715                bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
 716                                bvec.bv_offset);
 717                /*
 718                 * XXX: A single page IO would be inefficient for write
 719                 * but it would be not bad as starter.
 720                 */
 721                ret = submit_bio_wait(&bio);
 722                if (ret) {
 723                        zram_slot_lock(zram, index);
 724                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 725                        zram_clear_flag(zram, index, ZRAM_IDLE);
 726                        zram_slot_unlock(zram, index);
 727                        continue;
 728                }
 729
 730                atomic64_inc(&zram->stats.bd_writes);
 731                /*
 732                 * We released zram_slot_lock so need to check if the slot was
 733                 * changed. If there is freeing for the slot, we can catch it
 734                 * easily by zram_allocated.
 735                 * A subtle case is the slot is freed/reallocated/marked as
 736                 * ZRAM_IDLE again. To close the race, idle_store doesn't
 737                 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
 738                 * Thus, we could close the race by checking ZRAM_IDLE bit.
 739                 */
 740                zram_slot_lock(zram, index);
 741                if (!zram_allocated(zram, index) ||
 742                          !zram_test_flag(zram, index, ZRAM_IDLE)) {
 743                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 744                        zram_clear_flag(zram, index, ZRAM_IDLE);
 745                        goto next;
 746                }
 747
 748                zram_free_page(zram, index);
 749                zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 750                zram_set_flag(zram, index, ZRAM_WB);
 751                zram_set_element(zram, index, blk_idx);
 752                blk_idx = 0;
 753                atomic64_inc(&zram->stats.pages_stored);
 754                spin_lock(&zram->wb_limit_lock);
 755                if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
 756                        zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
 757                spin_unlock(&zram->wb_limit_lock);
 758next:
 759                zram_slot_unlock(zram, index);
 760        }
 761
 762        if (blk_idx)
 763                free_block_bdev(zram, blk_idx);
 764        ret = len;
 765        __free_page(page);
 766release_init_lock:
 767        up_read(&zram->init_lock);
 768
 769        return ret;
 770}
 771
 772struct zram_work {
 773        struct work_struct work;
 774        struct zram *zram;
 775        unsigned long entry;
 776        struct bio *bio;
 777        struct bio_vec bvec;
 778};
 779
 780#if PAGE_SIZE != 4096
 781static void zram_sync_read(struct work_struct *work)
 782{
 783        struct zram_work *zw = container_of(work, struct zram_work, work);
 784        struct zram *zram = zw->zram;
 785        unsigned long entry = zw->entry;
 786        struct bio *bio = zw->bio;
 787
 788        read_from_bdev_async(zram, &zw->bvec, entry, bio);
 789}
 790
 791/*
 792 * Block layer want one ->make_request_fn to be active at a time
 793 * so if we use chained IO with parent IO in same context,
 794 * it's a deadlock. To avoid, it, it uses worker thread context.
 795 */
 796static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 797                                unsigned long entry, struct bio *bio)
 798{
 799        struct zram_work work;
 800
 801        work.bvec = *bvec;
 802        work.zram = zram;
 803        work.entry = entry;
 804        work.bio = bio;
 805
 806        INIT_WORK_ONSTACK(&work.work, zram_sync_read);
 807        queue_work(system_unbound_wq, &work.work);
 808        flush_work(&work.work);
 809        destroy_work_on_stack(&work.work);
 810
 811        return 1;
 812}
 813#else
 814static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 815                                unsigned long entry, struct bio *bio)
 816{
 817        WARN_ON(1);
 818        return -EIO;
 819}
 820#endif
 821
 822static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 823                        unsigned long entry, struct bio *parent, bool sync)
 824{
 825        atomic64_inc(&zram->stats.bd_reads);
 826        if (sync)
 827                return read_from_bdev_sync(zram, bvec, entry, parent);
 828        else
 829                return read_from_bdev_async(zram, bvec, entry, parent);
 830}
 831#else
 832static inline void reset_bdev(struct zram *zram) {};
 833static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 834                        unsigned long entry, struct bio *parent, bool sync)
 835{
 836        return -EIO;
 837}
 838
 839static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
 840#endif
 841
 842#ifdef CONFIG_ZRAM_MEMORY_TRACKING
 843
 844static struct dentry *zram_debugfs_root;
 845
 846static void zram_debugfs_create(void)
 847{
 848        zram_debugfs_root = debugfs_create_dir("zram", NULL);
 849}
 850
 851static void zram_debugfs_destroy(void)
 852{
 853        debugfs_remove_recursive(zram_debugfs_root);
 854}
 855
 856static void zram_accessed(struct zram *zram, u32 index)
 857{
 858        zram_clear_flag(zram, index, ZRAM_IDLE);
 859        zram->table[index].ac_time = ktime_get_boottime();
 860}
 861
 862static ssize_t read_block_state(struct file *file, char __user *buf,
 863                                size_t count, loff_t *ppos)
 864{
 865        char *kbuf;
 866        ssize_t index, written = 0;
 867        struct zram *zram = file->private_data;
 868        unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 869        struct timespec64 ts;
 870
 871        kbuf = kvmalloc(count, GFP_KERNEL);
 872        if (!kbuf)
 873                return -ENOMEM;
 874
 875        down_read(&zram->init_lock);
 876        if (!init_done(zram)) {
 877                up_read(&zram->init_lock);
 878                kvfree(kbuf);
 879                return -EINVAL;
 880        }
 881
 882        for (index = *ppos; index < nr_pages; index++) {
 883                int copied;
 884
 885                zram_slot_lock(zram, index);
 886                if (!zram_allocated(zram, index))
 887                        goto next;
 888
 889                ts = ktime_to_timespec64(zram->table[index].ac_time);
 890                copied = snprintf(kbuf + written, count,
 891                        "%12zd %12lld.%06lu %c%c%c%c\n",
 892                        index, (s64)ts.tv_sec,
 893                        ts.tv_nsec / NSEC_PER_USEC,
 894                        zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
 895                        zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
 896                        zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
 897                        zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
 898
 899                if (count < copied) {
 900                        zram_slot_unlock(zram, index);
 901                        break;
 902                }
 903                written += copied;
 904                count -= copied;
 905next:
 906                zram_slot_unlock(zram, index);
 907                *ppos += 1;
 908        }
 909
 910        up_read(&zram->init_lock);
 911        if (copy_to_user(buf, kbuf, written))
 912                written = -EFAULT;
 913        kvfree(kbuf);
 914
 915        return written;
 916}
 917
 918static const struct file_operations proc_zram_block_state_op = {
 919        .open = simple_open,
 920        .read = read_block_state,
 921        .llseek = default_llseek,
 922};
 923
 924static void zram_debugfs_register(struct zram *zram)
 925{
 926        if (!zram_debugfs_root)
 927                return;
 928
 929        zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
 930                                                zram_debugfs_root);
 931        debugfs_create_file("block_state", 0400, zram->debugfs_dir,
 932                                zram, &proc_zram_block_state_op);
 933}
 934
 935static void zram_debugfs_unregister(struct zram *zram)
 936{
 937        debugfs_remove_recursive(zram->debugfs_dir);
 938}
 939#else
 940static void zram_debugfs_create(void) {};
 941static void zram_debugfs_destroy(void) {};
 942static void zram_accessed(struct zram *zram, u32 index)
 943{
 944        zram_clear_flag(zram, index, ZRAM_IDLE);
 945};
 946static void zram_debugfs_register(struct zram *zram) {};
 947static void zram_debugfs_unregister(struct zram *zram) {};
 948#endif
 949
 950/*
 951 * We switched to per-cpu streams and this attr is not needed anymore.
 952 * However, we will keep it around for some time, because:
 953 * a) we may revert per-cpu streams in the future
 954 * b) it's visible to user space and we need to follow our 2 years
 955 *    retirement rule; but we already have a number of 'soon to be
 956 *    altered' attrs, so max_comp_streams need to wait for the next
 957 *    layoff cycle.
 958 */
 959static ssize_t max_comp_streams_show(struct device *dev,
 960                struct device_attribute *attr, char *buf)
 961{
 962        return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
 963}
 964
 965static ssize_t max_comp_streams_store(struct device *dev,
 966                struct device_attribute *attr, const char *buf, size_t len)
 967{
 968        return len;
 969}
 970
 971static ssize_t comp_algorithm_show(struct device *dev,
 972                struct device_attribute *attr, char *buf)
 973{
 974        size_t sz;
 975        struct zram *zram = dev_to_zram(dev);
 976
 977        down_read(&zram->init_lock);
 978        sz = zcomp_available_show(zram->compressor, buf);
 979        up_read(&zram->init_lock);
 980
 981        return sz;
 982}
 983
 984static ssize_t comp_algorithm_store(struct device *dev,
 985                struct device_attribute *attr, const char *buf, size_t len)
 986{
 987        struct zram *zram = dev_to_zram(dev);
 988        char compressor[ARRAY_SIZE(zram->compressor)];
 989        size_t sz;
 990
 991        strlcpy(compressor, buf, sizeof(compressor));
 992        /* ignore trailing newline */
 993        sz = strlen(compressor);
 994        if (sz > 0 && compressor[sz - 1] == '\n')
 995                compressor[sz - 1] = 0x00;
 996
 997        if (!zcomp_available_algorithm(compressor))
 998                return -EINVAL;
 999
1000        down_write(&zram->init_lock);
1001        if (init_done(zram)) {
1002                up_write(&zram->init_lock);
1003                pr_info("Can't change algorithm for initialized device\n");
1004                return -EBUSY;
1005        }
1006
1007        strcpy(zram->compressor, compressor);
1008        up_write(&zram->init_lock);
1009        return len;
1010}
1011
1012static ssize_t compact_store(struct device *dev,
1013                struct device_attribute *attr, const char *buf, size_t len)
1014{
1015        struct zram *zram = dev_to_zram(dev);
1016
1017        down_read(&zram->init_lock);
1018        if (!init_done(zram)) {
1019                up_read(&zram->init_lock);
1020                return -EINVAL;
1021        }
1022
1023        zs_compact(zram->mem_pool);
1024        up_read(&zram->init_lock);
1025
1026        return len;
1027}
1028
1029static ssize_t io_stat_show(struct device *dev,
1030                struct device_attribute *attr, char *buf)
1031{
1032        struct zram *zram = dev_to_zram(dev);
1033        ssize_t ret;
1034
1035        down_read(&zram->init_lock);
1036        ret = scnprintf(buf, PAGE_SIZE,
1037                        "%8llu %8llu %8llu %8llu\n",
1038                        (u64)atomic64_read(&zram->stats.failed_reads),
1039                        (u64)atomic64_read(&zram->stats.failed_writes),
1040                        (u64)atomic64_read(&zram->stats.invalid_io),
1041                        (u64)atomic64_read(&zram->stats.notify_free));
1042        up_read(&zram->init_lock);
1043
1044        return ret;
1045}
1046
1047static ssize_t mm_stat_show(struct device *dev,
1048                struct device_attribute *attr, char *buf)
1049{
1050        struct zram *zram = dev_to_zram(dev);
1051        struct zs_pool_stats pool_stats;
1052        u64 orig_size, mem_used = 0;
1053        long max_used;
1054        ssize_t ret;
1055
1056        memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1057
1058        down_read(&zram->init_lock);
1059        if (init_done(zram)) {
1060                mem_used = zs_get_total_pages(zram->mem_pool);
1061                zs_pool_stats(zram->mem_pool, &pool_stats);
1062        }
1063
1064        orig_size = atomic64_read(&zram->stats.pages_stored);
1065        max_used = atomic_long_read(&zram->stats.max_used_pages);
1066
1067        ret = scnprintf(buf, PAGE_SIZE,
1068                        "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
1069                        orig_size << PAGE_SHIFT,
1070                        (u64)atomic64_read(&zram->stats.compr_data_size),
1071                        mem_used << PAGE_SHIFT,
1072                        zram->limit_pages << PAGE_SHIFT,
1073                        max_used << PAGE_SHIFT,
1074                        (u64)atomic64_read(&zram->stats.same_pages),
1075                        pool_stats.pages_compacted,
1076                        (u64)atomic64_read(&zram->stats.huge_pages));
1077        up_read(&zram->init_lock);
1078
1079        return ret;
1080}
1081
1082#ifdef CONFIG_ZRAM_WRITEBACK
1083#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1084static ssize_t bd_stat_show(struct device *dev,
1085                struct device_attribute *attr, char *buf)
1086{
1087        struct zram *zram = dev_to_zram(dev);
1088        ssize_t ret;
1089
1090        down_read(&zram->init_lock);
1091        ret = scnprintf(buf, PAGE_SIZE,
1092                "%8llu %8llu %8llu\n",
1093                        FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1094                        FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1095                        FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1096        up_read(&zram->init_lock);
1097
1098        return ret;
1099}
1100#endif
1101
1102static ssize_t debug_stat_show(struct device *dev,
1103                struct device_attribute *attr, char *buf)
1104{
1105        int version = 1;
1106        struct zram *zram = dev_to_zram(dev);
1107        ssize_t ret;
1108
1109        down_read(&zram->init_lock);
1110        ret = scnprintf(buf, PAGE_SIZE,
1111                        "version: %d\n%8llu %8llu\n",
1112                        version,
1113                        (u64)atomic64_read(&zram->stats.writestall),
1114                        (u64)atomic64_read(&zram->stats.miss_free));
1115        up_read(&zram->init_lock);
1116
1117        return ret;
1118}
1119
1120static DEVICE_ATTR_RO(io_stat);
1121static DEVICE_ATTR_RO(mm_stat);
1122#ifdef CONFIG_ZRAM_WRITEBACK
1123static DEVICE_ATTR_RO(bd_stat);
1124#endif
1125static DEVICE_ATTR_RO(debug_stat);
1126
1127static void zram_meta_free(struct zram *zram, u64 disksize)
1128{
1129        size_t num_pages = disksize >> PAGE_SHIFT;
1130        size_t index;
1131
1132        /* Free all pages that are still in this zram device */
1133        for (index = 0; index < num_pages; index++)
1134                zram_free_page(zram, index);
1135
1136        zs_destroy_pool(zram->mem_pool);
1137        vfree(zram->table);
1138}
1139
1140static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1141{
1142        size_t num_pages;
1143
1144        num_pages = disksize >> PAGE_SHIFT;
1145        zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1146        if (!zram->table)
1147                return false;
1148
1149        zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1150        if (!zram->mem_pool) {
1151                vfree(zram->table);
1152                return false;
1153        }
1154
1155        if (!huge_class_size)
1156                huge_class_size = zs_huge_class_size(zram->mem_pool);
1157        return true;
1158}
1159
1160/*
1161 * To protect concurrent access to the same index entry,
1162 * caller should hold this table index entry's bit_spinlock to
1163 * indicate this index entry is accessing.
1164 */
1165static void zram_free_page(struct zram *zram, size_t index)
1166{
1167        unsigned long handle;
1168
1169#ifdef CONFIG_ZRAM_MEMORY_TRACKING
1170        zram->table[index].ac_time = 0;
1171#endif
1172        if (zram_test_flag(zram, index, ZRAM_IDLE))
1173                zram_clear_flag(zram, index, ZRAM_IDLE);
1174
1175        if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1176                zram_clear_flag(zram, index, ZRAM_HUGE);
1177                atomic64_dec(&zram->stats.huge_pages);
1178        }
1179
1180        if (zram_test_flag(zram, index, ZRAM_WB)) {
1181                zram_clear_flag(zram, index, ZRAM_WB);
1182                free_block_bdev(zram, zram_get_element(zram, index));
1183                goto out;
1184        }
1185
1186        /*
1187         * No memory is allocated for same element filled pages.
1188         * Simply clear same page flag.
1189         */
1190        if (zram_test_flag(zram, index, ZRAM_SAME)) {
1191                zram_clear_flag(zram, index, ZRAM_SAME);
1192                atomic64_dec(&zram->stats.same_pages);
1193                goto out;
1194        }
1195
1196        handle = zram_get_handle(zram, index);
1197        if (!handle)
1198                return;
1199
1200        zs_free(zram->mem_pool, handle);
1201
1202        atomic64_sub(zram_get_obj_size(zram, index),
1203                        &zram->stats.compr_data_size);
1204out:
1205        atomic64_dec(&zram->stats.pages_stored);
1206        zram_set_handle(zram, index, 0);
1207        zram_set_obj_size(zram, index, 0);
1208        WARN_ON_ONCE(zram->table[index].flags &
1209                ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1210}
1211
1212static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1213                                struct bio *bio, bool partial_io)
1214{
1215        int ret;
1216        unsigned long handle;
1217        unsigned int size;
1218        void *src, *dst;
1219
1220        zram_slot_lock(zram, index);
1221        if (zram_test_flag(zram, index, ZRAM_WB)) {
1222                struct bio_vec bvec;
1223
1224                zram_slot_unlock(zram, index);
1225
1226                bvec.bv_page = page;
1227                bvec.bv_len = PAGE_SIZE;
1228                bvec.bv_offset = 0;
1229                return read_from_bdev(zram, &bvec,
1230                                zram_get_element(zram, index),
1231                                bio, partial_io);
1232        }
1233
1234        handle = zram_get_handle(zram, index);
1235        if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1236                unsigned long value;
1237                void *mem;
1238
1239                value = handle ? zram_get_element(zram, index) : 0;
1240                mem = kmap_atomic(page);
1241                zram_fill_page(mem, PAGE_SIZE, value);
1242                kunmap_atomic(mem);
1243                zram_slot_unlock(zram, index);
1244                return 0;
1245        }
1246
1247        size = zram_get_obj_size(zram, index);
1248
1249        src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1250        if (size == PAGE_SIZE) {
1251                dst = kmap_atomic(page);
1252                memcpy(dst, src, PAGE_SIZE);
1253                kunmap_atomic(dst);
1254                ret = 0;
1255        } else {
1256                struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
1257
1258                dst = kmap_atomic(page);
1259                ret = zcomp_decompress(zstrm, src, size, dst);
1260                kunmap_atomic(dst);
1261                zcomp_stream_put(zram->comp);
1262        }
1263        zs_unmap_object(zram->mem_pool, handle);
1264        zram_slot_unlock(zram, index);
1265
1266        /* Should NEVER happen. Return bio error if it does. */
1267        if (unlikely(ret))
1268                pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1269
1270        return ret;
1271}
1272
1273static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1274                                u32 index, int offset, struct bio *bio)
1275{
1276        int ret;
1277        struct page *page;
1278
1279        page = bvec->bv_page;
1280        if (is_partial_io(bvec)) {
1281                /* Use a temporary buffer to decompress the page */
1282                page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1283                if (!page)
1284                        return -ENOMEM;
1285        }
1286
1287        ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1288        if (unlikely(ret))
1289                goto out;
1290
1291        if (is_partial_io(bvec)) {
1292                void *dst = kmap_atomic(bvec->bv_page);
1293                void *src = kmap_atomic(page);
1294
1295                memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1296                kunmap_atomic(src);
1297                kunmap_atomic(dst);
1298        }
1299out:
1300        if (is_partial_io(bvec))
1301                __free_page(page);
1302
1303        return ret;
1304}
1305
1306static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1307                                u32 index, struct bio *bio)
1308{
1309        int ret = 0;
1310        unsigned long alloced_pages;
1311        unsigned long handle = 0;
1312        unsigned int comp_len = 0;
1313        void *src, *dst, *mem;
1314        struct zcomp_strm *zstrm;
1315        struct page *page = bvec->bv_page;
1316        unsigned long element = 0;
1317        enum zram_pageflags flags = 0;
1318
1319        mem = kmap_atomic(page);
1320        if (page_same_filled(mem, &element)) {
1321                kunmap_atomic(mem);
1322                /* Free memory associated with this sector now. */
1323                flags = ZRAM_SAME;
1324                atomic64_inc(&zram->stats.same_pages);
1325                goto out;
1326        }
1327        kunmap_atomic(mem);
1328
1329compress_again:
1330        zstrm = zcomp_stream_get(zram->comp);
1331        src = kmap_atomic(page);
1332        ret = zcomp_compress(zstrm, src, &comp_len);
1333        kunmap_atomic(src);
1334
1335        if (unlikely(ret)) {
1336                zcomp_stream_put(zram->comp);
1337                pr_err("Compression failed! err=%d\n", ret);
1338                zs_free(zram->mem_pool, handle);
1339                return ret;
1340        }
1341
1342        if (comp_len >= huge_class_size)
1343                comp_len = PAGE_SIZE;
1344        /*
1345         * handle allocation has 2 paths:
1346         * a) fast path is executed with preemption disabled (for
1347         *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1348         *  since we can't sleep;
1349         * b) slow path enables preemption and attempts to allocate
1350         *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1351         *  put per-cpu compression stream and, thus, to re-do
1352         *  the compression once handle is allocated.
1353         *
1354         * if we have a 'non-null' handle here then we are coming
1355         * from the slow path and handle has already been allocated.
1356         */
1357        if (!handle)
1358                handle = zs_malloc(zram->mem_pool, comp_len,
1359                                __GFP_KSWAPD_RECLAIM |
1360                                __GFP_NOWARN |
1361                                __GFP_HIGHMEM |
1362                                __GFP_MOVABLE);
1363        if (!handle) {
1364                zcomp_stream_put(zram->comp);
1365                atomic64_inc(&zram->stats.writestall);
1366                handle = zs_malloc(zram->mem_pool, comp_len,
1367                                GFP_NOIO | __GFP_HIGHMEM |
1368                                __GFP_MOVABLE);
1369                if (handle)
1370                        goto compress_again;
1371                return -ENOMEM;
1372        }
1373
1374        alloced_pages = zs_get_total_pages(zram->mem_pool);
1375        update_used_max(zram, alloced_pages);
1376
1377        if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1378                zcomp_stream_put(zram->comp);
1379                zs_free(zram->mem_pool, handle);
1380                return -ENOMEM;
1381        }
1382
1383        dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1384
1385        src = zstrm->buffer;
1386        if (comp_len == PAGE_SIZE)
1387                src = kmap_atomic(page);
1388        memcpy(dst, src, comp_len);
1389        if (comp_len == PAGE_SIZE)
1390                kunmap_atomic(src);
1391
1392        zcomp_stream_put(zram->comp);
1393        zs_unmap_object(zram->mem_pool, handle);
1394        atomic64_add(comp_len, &zram->stats.compr_data_size);
1395out:
1396        /*
1397         * Free memory associated with this sector
1398         * before overwriting unused sectors.
1399         */
1400        zram_slot_lock(zram, index);
1401        zram_free_page(zram, index);
1402
1403        if (comp_len == PAGE_SIZE) {
1404                zram_set_flag(zram, index, ZRAM_HUGE);
1405                atomic64_inc(&zram->stats.huge_pages);
1406        }
1407
1408        if (flags) {
1409                zram_set_flag(zram, index, flags);
1410                zram_set_element(zram, index, element);
1411        }  else {
1412                zram_set_handle(zram, index, handle);
1413                zram_set_obj_size(zram, index, comp_len);
1414        }
1415        zram_slot_unlock(zram, index);
1416
1417        /* Update stats */
1418        atomic64_inc(&zram->stats.pages_stored);
1419        return ret;
1420}
1421
1422static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1423                                u32 index, int offset, struct bio *bio)
1424{
1425        int ret;
1426        struct page *page = NULL;
1427        void *src;
1428        struct bio_vec vec;
1429
1430        vec = *bvec;
1431        if (is_partial_io(bvec)) {
1432                void *dst;
1433                /*
1434                 * This is a partial IO. We need to read the full page
1435                 * before to write the changes.
1436                 */
1437                page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1438                if (!page)
1439                        return -ENOMEM;
1440
1441                ret = __zram_bvec_read(zram, page, index, bio, true);
1442                if (ret)
1443                        goto out;
1444
1445                src = kmap_atomic(bvec->bv_page);
1446                dst = kmap_atomic(page);
1447                memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1448                kunmap_atomic(dst);
1449                kunmap_atomic(src);
1450
1451                vec.bv_page = page;
1452                vec.bv_len = PAGE_SIZE;
1453                vec.bv_offset = 0;
1454        }
1455
1456        ret = __zram_bvec_write(zram, &vec, index, bio);
1457out:
1458        if (is_partial_io(bvec))
1459                __free_page(page);
1460        return ret;
1461}
1462
1463/*
1464 * zram_bio_discard - handler on discard request
1465 * @index: physical block index in PAGE_SIZE units
1466 * @offset: byte offset within physical block
1467 */
1468static void zram_bio_discard(struct zram *zram, u32 index,
1469                             int offset, struct bio *bio)
1470{
1471        size_t n = bio->bi_iter.bi_size;
1472
1473        /*
1474         * zram manages data in physical block size units. Because logical block
1475         * size isn't identical with physical block size on some arch, we
1476         * could get a discard request pointing to a specific offset within a
1477         * certain physical block.  Although we can handle this request by
1478         * reading that physiclal block and decompressing and partially zeroing
1479         * and re-compressing and then re-storing it, this isn't reasonable
1480         * because our intent with a discard request is to save memory.  So
1481         * skipping this logical block is appropriate here.
1482         */
1483        if (offset) {
1484                if (n <= (PAGE_SIZE - offset))
1485                        return;
1486
1487                n -= (PAGE_SIZE - offset);
1488                index++;
1489        }
1490
1491        while (n >= PAGE_SIZE) {
1492                zram_slot_lock(zram, index);
1493                zram_free_page(zram, index);
1494                zram_slot_unlock(zram, index);
1495                atomic64_inc(&zram->stats.notify_free);
1496                index++;
1497                n -= PAGE_SIZE;
1498        }
1499}
1500
1501/*
1502 * Returns errno if it has some problem. Otherwise return 0 or 1.
1503 * Returns 0 if IO request was done synchronously
1504 * Returns 1 if IO request was successfully submitted.
1505 */
1506static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1507                        int offset, unsigned int op, struct bio *bio)
1508{
1509        unsigned long start_time = jiffies;
1510        struct request_queue *q = zram->disk->queue;
1511        int ret;
1512
1513        generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
1514                        &zram->disk->part0);
1515
1516        if (!op_is_write(op)) {
1517                atomic64_inc(&zram->stats.num_reads);
1518                ret = zram_bvec_read(zram, bvec, index, offset, bio);
1519                flush_dcache_page(bvec->bv_page);
1520        } else {
1521                atomic64_inc(&zram->stats.num_writes);
1522                ret = zram_bvec_write(zram, bvec, index, offset, bio);
1523        }
1524
1525        generic_end_io_acct(q, op, &zram->disk->part0, start_time);
1526
1527        zram_slot_lock(zram, index);
1528        zram_accessed(zram, index);
1529        zram_slot_unlock(zram, index);
1530
1531        if (unlikely(ret < 0)) {
1532                if (!op_is_write(op))
1533                        atomic64_inc(&zram->stats.failed_reads);
1534                else
1535                        atomic64_inc(&zram->stats.failed_writes);
1536        }
1537
1538        return ret;
1539}
1540
1541static void __zram_make_request(struct zram *zram, struct bio *bio)
1542{
1543        int offset;
1544        u32 index;
1545        struct bio_vec bvec;
1546        struct bvec_iter iter;
1547
1548        index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1549        offset = (bio->bi_iter.bi_sector &
1550                  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1551
1552        switch (bio_op(bio)) {
1553        case REQ_OP_DISCARD:
1554        case REQ_OP_WRITE_ZEROES:
1555                zram_bio_discard(zram, index, offset, bio);
1556                bio_endio(bio);
1557                return;
1558        default:
1559                break;
1560        }
1561
1562        bio_for_each_segment(bvec, bio, iter) {
1563                struct bio_vec bv = bvec;
1564                unsigned int unwritten = bvec.bv_len;
1565
1566                do {
1567                        bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1568                                                        unwritten);
1569                        if (zram_bvec_rw(zram, &bv, index, offset,
1570                                         bio_op(bio), bio) < 0)
1571                                goto out;
1572
1573                        bv.bv_offset += bv.bv_len;
1574                        unwritten -= bv.bv_len;
1575
1576                        update_position(&index, &offset, &bv);
1577                } while (unwritten);
1578        }
1579
1580        bio_endio(bio);
1581        return;
1582
1583out:
1584        bio_io_error(bio);
1585}
1586
1587/*
1588 * Handler function for all zram I/O requests.
1589 */
1590static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
1591{
1592        struct zram *zram = queue->queuedata;
1593
1594        if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1595                                        bio->bi_iter.bi_size)) {
1596                atomic64_inc(&zram->stats.invalid_io);
1597                goto error;
1598        }
1599
1600        __zram_make_request(zram, bio);
1601        return BLK_QC_T_NONE;
1602
1603error:
1604        bio_io_error(bio);
1605        return BLK_QC_T_NONE;
1606}
1607
1608static void zram_slot_free_notify(struct block_device *bdev,
1609                                unsigned long index)
1610{
1611        struct zram *zram;
1612
1613        zram = bdev->bd_disk->private_data;
1614
1615        atomic64_inc(&zram->stats.notify_free);
1616        if (!zram_slot_trylock(zram, index)) {
1617                atomic64_inc(&zram->stats.miss_free);
1618                return;
1619        }
1620
1621        zram_free_page(zram, index);
1622        zram_slot_unlock(zram, index);
1623}
1624
1625static int zram_rw_page(struct block_device *bdev, sector_t sector,
1626                       struct page *page, unsigned int op)
1627{
1628        int offset, ret;
1629        u32 index;
1630        struct zram *zram;
1631        struct bio_vec bv;
1632
1633        if (PageTransHuge(page))
1634                return -ENOTSUPP;
1635        zram = bdev->bd_disk->private_data;
1636
1637        if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1638                atomic64_inc(&zram->stats.invalid_io);
1639                ret = -EINVAL;
1640                goto out;
1641        }
1642
1643        index = sector >> SECTORS_PER_PAGE_SHIFT;
1644        offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1645
1646        bv.bv_page = page;
1647        bv.bv_len = PAGE_SIZE;
1648        bv.bv_offset = 0;
1649
1650        ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1651out:
1652        /*
1653         * If I/O fails, just return error(ie, non-zero) without
1654         * calling page_endio.
1655         * It causes resubmit the I/O with bio request by upper functions
1656         * of rw_page(e.g., swap_readpage, __swap_writepage) and
1657         * bio->bi_end_io does things to handle the error
1658         * (e.g., SetPageError, set_page_dirty and extra works).
1659         */
1660        if (unlikely(ret < 0))
1661                return ret;
1662
1663        switch (ret) {
1664        case 0:
1665                page_endio(page, op_is_write(op), 0);
1666                break;
1667        case 1:
1668                ret = 0;
1669                break;
1670        default:
1671                WARN_ON(1);
1672        }
1673        return ret;
1674}
1675
1676static void zram_reset_device(struct zram *zram)
1677{
1678        struct zcomp *comp;
1679        u64 disksize;
1680
1681        down_write(&zram->init_lock);
1682
1683        zram->limit_pages = 0;
1684
1685        if (!init_done(zram)) {
1686                up_write(&zram->init_lock);
1687                return;
1688        }
1689
1690        comp = zram->comp;
1691        disksize = zram->disksize;
1692        zram->disksize = 0;
1693
1694        set_capacity(zram->disk, 0);
1695        part_stat_set_all(&zram->disk->part0, 0);
1696
1697        up_write(&zram->init_lock);
1698        /* I/O operation under all of CPU are done so let's free */
1699        zram_meta_free(zram, disksize);
1700        memset(&zram->stats, 0, sizeof(zram->stats));
1701        zcomp_destroy(comp);
1702        reset_bdev(zram);
1703}
1704
1705static ssize_t disksize_store(struct device *dev,
1706                struct device_attribute *attr, const char *buf, size_t len)
1707{
1708        u64 disksize;
1709        struct zcomp *comp;
1710        struct zram *zram = dev_to_zram(dev);
1711        int err;
1712
1713        disksize = memparse(buf, NULL);
1714        if (!disksize)
1715                return -EINVAL;
1716
1717        down_write(&zram->init_lock);
1718        if (init_done(zram)) {
1719                pr_info("Cannot change disksize for initialized device\n");
1720                err = -EBUSY;
1721                goto out_unlock;
1722        }
1723
1724        disksize = PAGE_ALIGN(disksize);
1725        if (!zram_meta_alloc(zram, disksize)) {
1726                err = -ENOMEM;
1727                goto out_unlock;
1728        }
1729
1730        comp = zcomp_create(zram->compressor);
1731        if (IS_ERR(comp)) {
1732                pr_err("Cannot initialise %s compressing backend\n",
1733                                zram->compressor);
1734                err = PTR_ERR(comp);
1735                goto out_free_meta;
1736        }
1737
1738        zram->comp = comp;
1739        zram->disksize = disksize;
1740        set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1741
1742        revalidate_disk(zram->disk);
1743        up_write(&zram->init_lock);
1744
1745        return len;
1746
1747out_free_meta:
1748        zram_meta_free(zram, disksize);
1749out_unlock:
1750        up_write(&zram->init_lock);
1751        return err;
1752}
1753
1754static ssize_t reset_store(struct device *dev,
1755                struct device_attribute *attr, const char *buf, size_t len)
1756{
1757        int ret;
1758        unsigned short do_reset;
1759        struct zram *zram;
1760        struct block_device *bdev;
1761
1762        ret = kstrtou16(buf, 10, &do_reset);
1763        if (ret)
1764                return ret;
1765
1766        if (!do_reset)
1767                return -EINVAL;
1768
1769        zram = dev_to_zram(dev);
1770        bdev = bdget_disk(zram->disk, 0);
1771        if (!bdev)
1772                return -ENOMEM;
1773
1774        mutex_lock(&bdev->bd_mutex);
1775        /* Do not reset an active device or claimed device */
1776        if (bdev->bd_openers || zram->claim) {
1777                mutex_unlock(&bdev->bd_mutex);
1778                bdput(bdev);
1779                return -EBUSY;
1780        }
1781
1782        /* From now on, anyone can't open /dev/zram[0-9] */
1783        zram->claim = true;
1784        mutex_unlock(&bdev->bd_mutex);
1785
1786        /* Make sure all the pending I/O are finished */
1787        fsync_bdev(bdev);
1788        zram_reset_device(zram);
1789        revalidate_disk(zram->disk);
1790        bdput(bdev);
1791
1792        mutex_lock(&bdev->bd_mutex);
1793        zram->claim = false;
1794        mutex_unlock(&bdev->bd_mutex);
1795
1796        return len;
1797}
1798
1799static int zram_open(struct block_device *bdev, fmode_t mode)
1800{
1801        int ret = 0;
1802        struct zram *zram;
1803
1804        WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1805
1806        zram = bdev->bd_disk->private_data;
1807        /* zram was claimed to reset so open request fails */
1808        if (zram->claim)
1809                ret = -EBUSY;
1810
1811        return ret;
1812}
1813
1814static const struct block_device_operations zram_devops = {
1815        .open = zram_open,
1816        .swap_slot_free_notify = zram_slot_free_notify,
1817        .rw_page = zram_rw_page,
1818        .owner = THIS_MODULE
1819};
1820
1821static DEVICE_ATTR_WO(compact);
1822static DEVICE_ATTR_RW(disksize);
1823static DEVICE_ATTR_RO(initstate);
1824static DEVICE_ATTR_WO(reset);
1825static DEVICE_ATTR_WO(mem_limit);
1826static DEVICE_ATTR_WO(mem_used_max);
1827static DEVICE_ATTR_WO(idle);
1828static DEVICE_ATTR_RW(max_comp_streams);
1829static DEVICE_ATTR_RW(comp_algorithm);
1830#ifdef CONFIG_ZRAM_WRITEBACK
1831static DEVICE_ATTR_RW(backing_dev);
1832static DEVICE_ATTR_WO(writeback);
1833static DEVICE_ATTR_RW(writeback_limit);
1834static DEVICE_ATTR_RW(writeback_limit_enable);
1835#endif
1836
1837static struct attribute *zram_disk_attrs[] = {
1838        &dev_attr_disksize.attr,
1839        &dev_attr_initstate.attr,
1840        &dev_attr_reset.attr,
1841        &dev_attr_compact.attr,
1842        &dev_attr_mem_limit.attr,
1843        &dev_attr_mem_used_max.attr,
1844        &dev_attr_idle.attr,
1845        &dev_attr_max_comp_streams.attr,
1846        &dev_attr_comp_algorithm.attr,
1847#ifdef CONFIG_ZRAM_WRITEBACK
1848        &dev_attr_backing_dev.attr,
1849        &dev_attr_writeback.attr,
1850        &dev_attr_writeback_limit.attr,
1851        &dev_attr_writeback_limit_enable.attr,
1852#endif
1853        &dev_attr_io_stat.attr,
1854        &dev_attr_mm_stat.attr,
1855#ifdef CONFIG_ZRAM_WRITEBACK
1856        &dev_attr_bd_stat.attr,
1857#endif
1858        &dev_attr_debug_stat.attr,
1859        NULL,
1860};
1861
1862static const struct attribute_group zram_disk_attr_group = {
1863        .attrs = zram_disk_attrs,
1864};
1865
1866static const struct attribute_group *zram_disk_attr_groups[] = {
1867        &zram_disk_attr_group,
1868        NULL,
1869};
1870
1871/*
1872 * Allocate and initialize new zram device. the function returns
1873 * '>= 0' device_id upon success, and negative value otherwise.
1874 */
1875static int zram_add(void)
1876{
1877        struct zram *zram;
1878        struct request_queue *queue;
1879        int ret, device_id;
1880
1881        zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1882        if (!zram)
1883                return -ENOMEM;
1884
1885        ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1886        if (ret < 0)
1887                goto out_free_dev;
1888        device_id = ret;
1889
1890        init_rwsem(&zram->init_lock);
1891#ifdef CONFIG_ZRAM_WRITEBACK
1892        spin_lock_init(&zram->wb_limit_lock);
1893#endif
1894        queue = blk_alloc_queue(GFP_KERNEL);
1895        if (!queue) {
1896                pr_err("Error allocating disk queue for device %d\n",
1897                        device_id);
1898                ret = -ENOMEM;
1899                goto out_free_idr;
1900        }
1901
1902        blk_queue_make_request(queue, zram_make_request);
1903
1904        /* gendisk structure */
1905        zram->disk = alloc_disk(1);
1906        if (!zram->disk) {
1907                pr_err("Error allocating disk structure for device %d\n",
1908                        device_id);
1909                ret = -ENOMEM;
1910                goto out_free_queue;
1911        }
1912
1913        zram->disk->major = zram_major;
1914        zram->disk->first_minor = device_id;
1915        zram->disk->fops = &zram_devops;
1916        zram->disk->queue = queue;
1917        zram->disk->queue->queuedata = zram;
1918        zram->disk->private_data = zram;
1919        snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1920
1921        /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1922        set_capacity(zram->disk, 0);
1923        /* zram devices sort of resembles non-rotational disks */
1924        blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1925        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1926
1927        /*
1928         * To ensure that we always get PAGE_SIZE aligned
1929         * and n*PAGE_SIZED sized I/O requests.
1930         */
1931        blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1932        blk_queue_logical_block_size(zram->disk->queue,
1933                                        ZRAM_LOGICAL_BLOCK_SIZE);
1934        blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1935        blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1936        zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1937        blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1938        blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1939
1940        /*
1941         * zram_bio_discard() will clear all logical blocks if logical block
1942         * size is identical with physical block size(PAGE_SIZE). But if it is
1943         * different, we will skip discarding some parts of logical blocks in
1944         * the part of the request range which isn't aligned to physical block
1945         * size.  So we can't ensure that all discarded logical blocks are
1946         * zeroed.
1947         */
1948        if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1949                blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1950
1951        zram->disk->queue->backing_dev_info->capabilities |=
1952                        (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
1953        device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
1954
1955        strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1956
1957        zram_debugfs_register(zram);
1958        pr_info("Added device: %s\n", zram->disk->disk_name);
1959        return device_id;
1960
1961out_free_queue:
1962        blk_cleanup_queue(queue);
1963out_free_idr:
1964        idr_remove(&zram_index_idr, device_id);
1965out_free_dev:
1966        kfree(zram);
1967        return ret;
1968}
1969
1970static int zram_remove(struct zram *zram)
1971{
1972        struct block_device *bdev;
1973
1974        bdev = bdget_disk(zram->disk, 0);
1975        if (!bdev)
1976                return -ENOMEM;
1977
1978        mutex_lock(&bdev->bd_mutex);
1979        if (bdev->bd_openers || zram->claim) {
1980                mutex_unlock(&bdev->bd_mutex);
1981                bdput(bdev);
1982                return -EBUSY;
1983        }
1984
1985        zram->claim = true;
1986        mutex_unlock(&bdev->bd_mutex);
1987
1988        zram_debugfs_unregister(zram);
1989
1990        /* Make sure all the pending I/O are finished */
1991        fsync_bdev(bdev);
1992        zram_reset_device(zram);
1993        bdput(bdev);
1994
1995        pr_info("Removed device: %s\n", zram->disk->disk_name);
1996
1997        del_gendisk(zram->disk);
1998        blk_cleanup_queue(zram->disk->queue);
1999        put_disk(zram->disk);
2000        kfree(zram);
2001        return 0;
2002}
2003
2004/* zram-control sysfs attributes */
2005
2006/*
2007 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2008 * sense that reading from this file does alter the state of your system -- it
2009 * creates a new un-initialized zram device and returns back this device's
2010 * device_id (or an error code if it fails to create a new device).
2011 */
2012static ssize_t hot_add_show(struct class *class,
2013                        struct class_attribute *attr,
2014                        char *buf)
2015{
2016        int ret;
2017
2018        mutex_lock(&zram_index_mutex);
2019        ret = zram_add();
2020        mutex_unlock(&zram_index_mutex);
2021
2022        if (ret < 0)
2023                return ret;
2024        return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2025}
2026static CLASS_ATTR_RO(hot_add);
2027
2028static ssize_t hot_remove_store(struct class *class,
2029                        struct class_attribute *attr,
2030                        const char *buf,
2031                        size_t count)
2032{
2033        struct zram *zram;
2034        int ret, dev_id;
2035
2036        /* dev_id is gendisk->first_minor, which is `int' */
2037        ret = kstrtoint(buf, 10, &dev_id);
2038        if (ret)
2039                return ret;
2040        if (dev_id < 0)
2041                return -EINVAL;
2042
2043        mutex_lock(&zram_index_mutex);
2044
2045        zram = idr_find(&zram_index_idr, dev_id);
2046        if (zram) {
2047                ret = zram_remove(zram);
2048                if (!ret)
2049                        idr_remove(&zram_index_idr, dev_id);
2050        } else {
2051                ret = -ENODEV;
2052        }
2053
2054        mutex_unlock(&zram_index_mutex);
2055        return ret ? ret : count;
2056}
2057static CLASS_ATTR_WO(hot_remove);
2058
2059static struct attribute *zram_control_class_attrs[] = {
2060        &class_attr_hot_add.attr,
2061        &class_attr_hot_remove.attr,
2062        NULL,
2063};
2064ATTRIBUTE_GROUPS(zram_control_class);
2065
2066static struct class zram_control_class = {
2067        .name           = "zram-control",
2068        .owner          = THIS_MODULE,
2069        .class_groups   = zram_control_class_groups,
2070};
2071
2072static int zram_remove_cb(int id, void *ptr, void *data)
2073{
2074        zram_remove(ptr);
2075        return 0;
2076}
2077
2078static void destroy_devices(void)
2079{
2080        class_unregister(&zram_control_class);
2081        idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2082        zram_debugfs_destroy();
2083        idr_destroy(&zram_index_idr);
2084        unregister_blkdev(zram_major, "zram");
2085        cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2086}
2087
2088static int __init zram_init(void)
2089{
2090        int ret;
2091
2092        ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2093                                      zcomp_cpu_up_prepare, zcomp_cpu_dead);
2094        if (ret < 0)
2095                return ret;
2096
2097        ret = class_register(&zram_control_class);
2098        if (ret) {
2099                pr_err("Unable to register zram-control class\n");
2100                cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2101                return ret;
2102        }
2103
2104        zram_debugfs_create();
2105        zram_major = register_blkdev(0, "zram");
2106        if (zram_major <= 0) {
2107                pr_err("Unable to get major number\n");
2108                class_unregister(&zram_control_class);
2109                cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2110                return -EBUSY;
2111        }
2112
2113        while (num_devices != 0) {
2114                mutex_lock(&zram_index_mutex);
2115                ret = zram_add();
2116                mutex_unlock(&zram_index_mutex);
2117                if (ret < 0)
2118                        goto out_error;
2119                num_devices--;
2120        }
2121
2122        return 0;
2123
2124out_error:
2125        destroy_devices();
2126        return ret;
2127}
2128
2129static void __exit zram_exit(void)
2130{
2131        destroy_devices();
2132}
2133
2134module_init(zram_init);
2135module_exit(zram_exit);
2136
2137module_param(num_devices, uint, 0);
2138MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2139
2140MODULE_LICENSE("Dual BSD/GPL");
2141MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2142MODULE_DESCRIPTION("Compressed RAM Block Device");
2143