linux/drivers/staging/zram/xvmalloc.c
<<
>>
Prefs
   1/*
   2 * xvmalloc memory allocator
   3 *
   4 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
   5 *
   6 * This code is released using a dual license strategy: BSD/GPL
   7 * You can choose the licence that better fits your requirements.
   8 *
   9 * Released under the terms of 3-clause BSD License
  10 * Released under the terms of GNU General Public License Version 2.0
  11 */
  12
  13#ifdef CONFIG_ZRAM_DEBUG
  14#define DEBUG
  15#endif
  16
  17#include <linux/module.h>
  18#include <linux/kernel.h>
  19#include <linux/bitops.h>
  20#include <linux/errno.h>
  21#include <linux/highmem.h>
  22#include <linux/init.h>
  23#include <linux/string.h>
  24#include <linux/slab.h>
  25
  26#include "xvmalloc.h"
  27#include "xvmalloc_int.h"
  28
  29static void stat_inc(u64 *value)
  30{
  31        *value = *value + 1;
  32}
  33
  34static void stat_dec(u64 *value)
  35{
  36        *value = *value - 1;
  37}
  38
  39static int test_flag(struct block_header *block, enum blockflags flag)
  40{
  41        return block->prev & BIT(flag);
  42}
  43
  44static void set_flag(struct block_header *block, enum blockflags flag)
  45{
  46        block->prev |= BIT(flag);
  47}
  48
  49static void clear_flag(struct block_header *block, enum blockflags flag)
  50{
  51        block->prev &= ~BIT(flag);
  52}
  53
  54/*
  55 * Given <page, offset> pair, provide a dereferencable pointer.
  56 * This is called from xv_malloc/xv_free path, so it
  57 * needs to be fast.
  58 */
  59static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
  60{
  61        unsigned char *base;
  62
  63        base = kmap_atomic(page, type);
  64        return base + offset;
  65}
  66
  67static void put_ptr_atomic(void *ptr, enum km_type type)
  68{
  69        kunmap_atomic(ptr, type);
  70}
  71
  72static u32 get_blockprev(struct block_header *block)
  73{
  74        return block->prev & PREV_MASK;
  75}
  76
  77static void set_blockprev(struct block_header *block, u16 new_offset)
  78{
  79        block->prev = new_offset | (block->prev & FLAGS_MASK);
  80}
  81
  82static struct block_header *BLOCK_NEXT(struct block_header *block)
  83{
  84        return (struct block_header *)
  85                ((char *)block + block->size + XV_ALIGN);
  86}
  87
  88/*
  89 * Get index of free list containing blocks of maximum size
  90 * which is less than or equal to given size.
  91 */
  92static u32 get_index_for_insert(u32 size)
  93{
  94        if (unlikely(size > XV_MAX_ALLOC_SIZE))
  95                size = XV_MAX_ALLOC_SIZE;
  96        size &= ~FL_DELTA_MASK;
  97        return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
  98}
  99
 100/*
 101 * Get index of free list having blocks of size greater than
 102 * or equal to requested size.
 103 */
 104static u32 get_index(u32 size)
 105{
 106        if (unlikely(size < XV_MIN_ALLOC_SIZE))
 107                size = XV_MIN_ALLOC_SIZE;
 108        size = ALIGN(size, FL_DELTA);
 109        return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
 110}
 111
 112/**
 113 * find_block - find block of at least given size
 114 * @pool: memory pool to search from
 115 * @size: size of block required
 116 * @page: page containing required block
 117 * @offset: offset within the page where block is located.
 118 *
 119 * Searches two level bitmap to locate block of at least
 120 * the given size. If such a block is found, it provides
 121 * <page, offset> to identify this block and returns index
 122 * in freelist where we found this block.
 123 * Otherwise, returns 0 and <page, offset> params are not touched.
 124 */
 125static u32 find_block(struct xv_pool *pool, u32 size,
 126                        struct page **page, u32 *offset)
 127{
 128        ulong flbitmap, slbitmap;
 129        u32 flindex, slindex, slbitstart;
 130
 131        /* There are no free blocks in this pool */
 132        if (!pool->flbitmap)
 133                return 0;
 134
 135        /* Get freelist index correspoding to this size */
 136        slindex = get_index(size);
 137        slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
 138        slbitstart = slindex % BITS_PER_LONG;
 139
 140        /*
 141         * If freelist is not empty at this index, we found the
 142         * block - head of this list. This is approximate best-fit match.
 143         */
 144        if (test_bit(slbitstart, &slbitmap)) {
 145                *page = pool->freelist[slindex].page;
 146                *offset = pool->freelist[slindex].offset;
 147                return slindex;
 148        }
 149
 150        /*
 151         * No best-fit found. Search a bit further in bitmap for a free block.
 152         * Second level bitmap consists of series of 32-bit chunks. Search
 153         * further in the chunk where we expected a best-fit, starting from
 154         * index location found above.
 155         */
 156        slbitstart++;
 157        slbitmap >>= slbitstart;
 158
 159        /* Skip this search if we were already at end of this bitmap chunk */
 160        if ((slbitstart != BITS_PER_LONG) && slbitmap) {
 161                slindex += __ffs(slbitmap) + 1;
 162                *page = pool->freelist[slindex].page;
 163                *offset = pool->freelist[slindex].offset;
 164                return slindex;
 165        }
 166
 167        /* Now do a full two-level bitmap search to find next nearest fit */
 168        flindex = slindex / BITS_PER_LONG;
 169
 170        flbitmap = (pool->flbitmap) >> (flindex + 1);
 171        if (!flbitmap)
 172                return 0;
 173
 174        flindex += __ffs(flbitmap) + 1;
 175        slbitmap = pool->slbitmap[flindex];
 176        slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
 177        *page = pool->freelist[slindex].page;
 178        *offset = pool->freelist[slindex].offset;
 179
 180        return slindex;
 181}
 182
 183/*
 184 * Insert block at <page, offset> in freelist of given pool.
 185 * freelist used depends on block size.
 186 */
 187static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
 188                        struct block_header *block)
 189{
 190        u32 flindex, slindex;
 191        struct block_header *nextblock;
 192
 193        slindex = get_index_for_insert(block->size);
 194        flindex = slindex / BITS_PER_LONG;
 195
 196        block->link.prev_page = NULL;
 197        block->link.prev_offset = 0;
 198        block->link.next_page = pool->freelist[slindex].page;
 199        block->link.next_offset = pool->freelist[slindex].offset;
 200        pool->freelist[slindex].page = page;
 201        pool->freelist[slindex].offset = offset;
 202
 203        if (block->link.next_page) {
 204                nextblock = get_ptr_atomic(block->link.next_page,
 205                                        block->link.next_offset, KM_USER1);
 206                nextblock->link.prev_page = page;
 207                nextblock->link.prev_offset = offset;
 208                put_ptr_atomic(nextblock, KM_USER1);
 209                /* If there was a next page then the free bits are set. */
 210                return;
 211        }
 212
 213        __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
 214        __set_bit(flindex, &pool->flbitmap);
 215}
 216
 217/*
 218 * Remove block from freelist. Index 'slindex' identifies the freelist.
 219 */
 220static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
 221                        struct block_header *block, u32 slindex)
 222{
 223        u32 flindex = slindex / BITS_PER_LONG;
 224        struct block_header *tmpblock;
 225
 226        if (block->link.prev_page) {
 227                tmpblock = get_ptr_atomic(block->link.prev_page,
 228                                block->link.prev_offset, KM_USER1);
 229                tmpblock->link.next_page = block->link.next_page;
 230                tmpblock->link.next_offset = block->link.next_offset;
 231                put_ptr_atomic(tmpblock, KM_USER1);
 232        }
 233
 234        if (block->link.next_page) {
 235                tmpblock = get_ptr_atomic(block->link.next_page,
 236                                block->link.next_offset, KM_USER1);
 237                tmpblock->link.prev_page = block->link.prev_page;
 238                tmpblock->link.prev_offset = block->link.prev_offset;
 239                put_ptr_atomic(tmpblock, KM_USER1);
 240        }
 241
 242        /* Is this block is at the head of the freelist? */
 243        if (pool->freelist[slindex].page == page
 244           && pool->freelist[slindex].offset == offset) {
 245
 246                pool->freelist[slindex].page = block->link.next_page;
 247                pool->freelist[slindex].offset = block->link.next_offset;
 248
 249                if (pool->freelist[slindex].page) {
 250                        struct block_header *tmpblock;
 251                        tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
 252                                        pool->freelist[slindex].offset,
 253                                        KM_USER1);
 254                        tmpblock->link.prev_page = NULL;
 255                        tmpblock->link.prev_offset = 0;
 256                        put_ptr_atomic(tmpblock, KM_USER1);
 257                } else {
 258                        /* This freelist bucket is empty */
 259                        __clear_bit(slindex % BITS_PER_LONG,
 260                                    &pool->slbitmap[flindex]);
 261                        if (!pool->slbitmap[flindex])
 262                                __clear_bit(flindex, &pool->flbitmap);
 263                }
 264        }
 265
 266        block->link.prev_page = NULL;
 267        block->link.prev_offset = 0;
 268        block->link.next_page = NULL;
 269        block->link.next_offset = 0;
 270}
 271
 272/*
 273 * Allocate a page and add it to freelist of given pool.
 274 */
 275static int grow_pool(struct xv_pool *pool, gfp_t flags)
 276{
 277        struct page *page;
 278        struct block_header *block;
 279
 280        page = alloc_page(flags);
 281        if (unlikely(!page))
 282                return -ENOMEM;
 283
 284        stat_inc(&pool->total_pages);
 285
 286        spin_lock(&pool->lock);
 287        block = get_ptr_atomic(page, 0, KM_USER0);
 288
 289        block->size = PAGE_SIZE - XV_ALIGN;
 290        set_flag(block, BLOCK_FREE);
 291        clear_flag(block, PREV_FREE);
 292        set_blockprev(block, 0);
 293
 294        insert_block(pool, page, 0, block);
 295
 296        put_ptr_atomic(block, KM_USER0);
 297        spin_unlock(&pool->lock);
 298
 299        return 0;
 300}
 301
 302/*
 303 * Create a memory pool. Allocates freelist, bitmaps and other
 304 * per-pool metadata.
 305 */
 306struct xv_pool *xv_create_pool(void)
 307{
 308        u32 ovhd_size;
 309        struct xv_pool *pool;
 310
 311        ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
 312        pool = kzalloc(ovhd_size, GFP_KERNEL);
 313        if (!pool)
 314                return NULL;
 315
 316        spin_lock_init(&pool->lock);
 317
 318        return pool;
 319}
 320EXPORT_SYMBOL_GPL(xv_create_pool);
 321
 322void xv_destroy_pool(struct xv_pool *pool)
 323{
 324        kfree(pool);
 325}
 326EXPORT_SYMBOL_GPL(xv_destroy_pool);
 327
 328/**
 329 * xv_malloc - Allocate block of given size from pool.
 330 * @pool: pool to allocate from
 331 * @size: size of block to allocate
 332 * @page: page no. that holds the object
 333 * @offset: location of object within page
 334 *
 335 * On success, <page, offset> identifies block allocated
 336 * and 0 is returned. On failure, <page, offset> is set to
 337 * 0 and -ENOMEM is returned.
 338 *
 339 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
 340 */
 341int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
 342                u32 *offset, gfp_t flags)
 343{
 344        int error;
 345        u32 index, tmpsize, origsize, tmpoffset;
 346        struct block_header *block, *tmpblock;
 347
 348        *page = NULL;
 349        *offset = 0;
 350        origsize = size;
 351
 352        if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
 353                return -ENOMEM;
 354
 355        size = ALIGN(size, XV_ALIGN);
 356
 357        spin_lock(&pool->lock);
 358
 359        index = find_block(pool, size, page, offset);
 360
 361        if (!*page) {
 362                spin_unlock(&pool->lock);
 363                if (flags & GFP_NOWAIT)
 364                        return -ENOMEM;
 365                error = grow_pool(pool, flags);
 366                if (unlikely(error))
 367                        return error;
 368
 369                spin_lock(&pool->lock);
 370                index = find_block(pool, size, page, offset);
 371        }
 372
 373        if (!*page) {
 374                spin_unlock(&pool->lock);
 375                return -ENOMEM;
 376        }
 377
 378        block = get_ptr_atomic(*page, *offset, KM_USER0);
 379
 380        remove_block(pool, *page, *offset, block, index);
 381
 382        /* Split the block if required */
 383        tmpoffset = *offset + size + XV_ALIGN;
 384        tmpsize = block->size - size;
 385        tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
 386        if (tmpsize) {
 387                tmpblock->size = tmpsize - XV_ALIGN;
 388                set_flag(tmpblock, BLOCK_FREE);
 389                clear_flag(tmpblock, PREV_FREE);
 390
 391                set_blockprev(tmpblock, *offset);
 392                if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
 393                        insert_block(pool, *page, tmpoffset, tmpblock);
 394
 395                if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
 396                        tmpblock = BLOCK_NEXT(tmpblock);
 397                        set_blockprev(tmpblock, tmpoffset);
 398                }
 399        } else {
 400                /* This block is exact fit */
 401                if (tmpoffset != PAGE_SIZE)
 402                        clear_flag(tmpblock, PREV_FREE);
 403        }
 404
 405        block->size = origsize;
 406        clear_flag(block, BLOCK_FREE);
 407
 408        put_ptr_atomic(block, KM_USER0);
 409        spin_unlock(&pool->lock);
 410
 411        *offset += XV_ALIGN;
 412
 413        return 0;
 414}
 415EXPORT_SYMBOL_GPL(xv_malloc);
 416
 417/*
 418 * Free block identified with <page, offset>
 419 */
 420void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
 421{
 422        void *page_start;
 423        struct block_header *block, *tmpblock;
 424
 425        offset -= XV_ALIGN;
 426
 427        spin_lock(&pool->lock);
 428
 429        page_start = get_ptr_atomic(page, 0, KM_USER0);
 430        block = (struct block_header *)((char *)page_start + offset);
 431
 432        /* Catch double free bugs */
 433        BUG_ON(test_flag(block, BLOCK_FREE));
 434
 435        block->size = ALIGN(block->size, XV_ALIGN);
 436
 437        tmpblock = BLOCK_NEXT(block);
 438        if (offset + block->size + XV_ALIGN == PAGE_SIZE)
 439                tmpblock = NULL;
 440
 441        /* Merge next block if its free */
 442        if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
 443                /*
 444                 * Blocks smaller than XV_MIN_ALLOC_SIZE
 445                 * are not inserted in any free list.
 446                 */
 447                if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
 448                        remove_block(pool, page,
 449                                    offset + block->size + XV_ALIGN, tmpblock,
 450                                    get_index_for_insert(tmpblock->size));
 451                }
 452                block->size += tmpblock->size + XV_ALIGN;
 453        }
 454
 455        /* Merge previous block if its free */
 456        if (test_flag(block, PREV_FREE)) {
 457                tmpblock = (struct block_header *)((char *)(page_start) +
 458                                                get_blockprev(block));
 459                offset = offset - tmpblock->size - XV_ALIGN;
 460
 461                if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
 462                        remove_block(pool, page, offset, tmpblock,
 463                                    get_index_for_insert(tmpblock->size));
 464
 465                tmpblock->size += block->size + XV_ALIGN;
 466                block = tmpblock;
 467        }
 468
 469        /* No used objects in this page. Free it. */
 470        if (block->size == PAGE_SIZE - XV_ALIGN) {
 471                put_ptr_atomic(page_start, KM_USER0);
 472                spin_unlock(&pool->lock);
 473
 474                __free_page(page);
 475                stat_dec(&pool->total_pages);
 476                return;
 477        }
 478
 479        set_flag(block, BLOCK_FREE);
 480        if (block->size >= XV_MIN_ALLOC_SIZE)
 481                insert_block(pool, page, offset, block);
 482
 483        if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
 484                tmpblock = BLOCK_NEXT(block);
 485                set_flag(tmpblock, PREV_FREE);
 486                set_blockprev(tmpblock, offset);
 487        }
 488
 489        put_ptr_atomic(page_start, KM_USER0);
 490        spin_unlock(&pool->lock);
 491}
 492EXPORT_SYMBOL_GPL(xv_free);
 493
 494u32 xv_get_object_size(void *obj)
 495{
 496        struct block_header *blk;
 497
 498        blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
 499        return blk->size;
 500}
 501EXPORT_SYMBOL_GPL(xv_get_object_size);
 502
 503/*
 504 * Returns total memory used by allocator (userdata + metadata)
 505 */
 506u64 xv_get_total_size_bytes(struct xv_pool *pool)
 507{
 508        return pool->total_pages << PAGE_SHIFT;
 509}
 510EXPORT_SYMBOL_GPL(xv_get_total_size_bytes);
 511