linux/fs/btrfs/zlib.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2008 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 *
  18 * Based on jffs2 zlib code:
  19 * Copyright © 2001-2007 Red Hat, Inc.
  20 * Created by David Woodhouse <dwmw2@infradead.org>
  21 */
  22
  23#include <linux/kernel.h>
  24#include <linux/slab.h>
  25#include <linux/zlib.h>
  26#include <linux/zutil.h>
  27#include <linux/vmalloc.h>
  28#include <linux/init.h>
  29#include <linux/err.h>
  30#include <linux/sched.h>
  31#include <linux/pagemap.h>
  32#include <linux/bio.h>
  33#include "compression.h"
  34
  35/* Plan: call deflate() with avail_in == *sourcelen,
  36        avail_out = *dstlen - 12 and flush == Z_FINISH.
  37        If it doesn't manage to finish, call it again with
  38        avail_in == 0 and avail_out set to the remaining 12
  39        bytes for it to clean up.
  40   Q: Is 12 bytes sufficient?
  41*/
  42#define STREAM_END_SPACE 12
  43
  44struct workspace {
  45        z_stream inf_strm;
  46        z_stream def_strm;
  47        char *buf;
  48        struct list_head list;
  49};
  50
  51static LIST_HEAD(idle_workspace);
  52static DEFINE_SPINLOCK(workspace_lock);
  53static unsigned long num_workspace;
  54static atomic_t alloc_workspace = ATOMIC_INIT(0);
  55static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
  56
  57/*
  58 * this finds an available zlib workspace or allocates a new one
  59 * NULL or an ERR_PTR is returned if things go bad.
  60 */
  61static struct workspace *find_zlib_workspace(void)
  62{
  63        struct workspace *workspace;
  64        int ret;
  65        int cpus = num_online_cpus();
  66
  67again:
  68        spin_lock(&workspace_lock);
  69        if (!list_empty(&idle_workspace)) {
  70                workspace = list_entry(idle_workspace.next, struct workspace,
  71                                       list);
  72                list_del(&workspace->list);
  73                num_workspace--;
  74                spin_unlock(&workspace_lock);
  75                return workspace;
  76
  77        }
  78        spin_unlock(&workspace_lock);
  79        if (atomic_read(&alloc_workspace) > cpus) {
  80                DEFINE_WAIT(wait);
  81                prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
  82                if (atomic_read(&alloc_workspace) > cpus)
  83                        schedule();
  84                finish_wait(&workspace_wait, &wait);
  85                goto again;
  86        }
  87        atomic_inc(&alloc_workspace);
  88        workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
  89        if (!workspace) {
  90                ret = -ENOMEM;
  91                goto fail;
  92        }
  93
  94        workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
  95        if (!workspace->def_strm.workspace) {
  96                ret = -ENOMEM;
  97                goto fail;
  98        }
  99        workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
 100        if (!workspace->inf_strm.workspace) {
 101                ret = -ENOMEM;
 102                goto fail_inflate;
 103        }
 104        workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
 105        if (!workspace->buf) {
 106                ret = -ENOMEM;
 107                goto fail_kmalloc;
 108        }
 109        return workspace;
 110
 111fail_kmalloc:
 112        vfree(workspace->inf_strm.workspace);
 113fail_inflate:
 114        vfree(workspace->def_strm.workspace);
 115fail:
 116        kfree(workspace);
 117        atomic_dec(&alloc_workspace);
 118        wake_up(&workspace_wait);
 119        return ERR_PTR(ret);
 120}
 121
 122/*
 123 * put a workspace struct back on the list or free it if we have enough
 124 * idle ones sitting around
 125 */
 126static int free_workspace(struct workspace *workspace)
 127{
 128        spin_lock(&workspace_lock);
 129        if (num_workspace < num_online_cpus()) {
 130                list_add_tail(&workspace->list, &idle_workspace);
 131                num_workspace++;
 132                spin_unlock(&workspace_lock);
 133                if (waitqueue_active(&workspace_wait))
 134                        wake_up(&workspace_wait);
 135                return 0;
 136        }
 137        spin_unlock(&workspace_lock);
 138        vfree(workspace->def_strm.workspace);
 139        vfree(workspace->inf_strm.workspace);
 140        kfree(workspace->buf);
 141        kfree(workspace);
 142
 143        atomic_dec(&alloc_workspace);
 144        if (waitqueue_active(&workspace_wait))
 145                wake_up(&workspace_wait);
 146        return 0;
 147}
 148
 149/*
 150 * cleanup function for module exit
 151 */
 152static void free_workspaces(void)
 153{
 154        struct workspace *workspace;
 155        while (!list_empty(&idle_workspace)) {
 156                workspace = list_entry(idle_workspace.next, struct workspace,
 157                                       list);
 158                list_del(&workspace->list);
 159                vfree(workspace->def_strm.workspace);
 160                vfree(workspace->inf_strm.workspace);
 161                kfree(workspace->buf);
 162                kfree(workspace);
 163                atomic_dec(&alloc_workspace);
 164        }
 165}
 166
 167/*
 168 * given an address space and start/len, compress the bytes.
 169 *
 170 * pages are allocated to hold the compressed result and stored
 171 * in 'pages'
 172 *
 173 * out_pages is used to return the number of pages allocated.  There
 174 * may be pages allocated even if we return an error
 175 *
 176 * total_in is used to return the number of bytes actually read.  It
 177 * may be smaller then len if we had to exit early because we
 178 * ran out of room in the pages array or because we cross the
 179 * max_out threshold.
 180 *
 181 * total_out is used to return the total number of compressed bytes
 182 *
 183 * max_out tells us the max number of bytes that we're allowed to
 184 * stuff into pages
 185 */
 186int btrfs_zlib_compress_pages(struct address_space *mapping,
 187                              u64 start, unsigned long len,
 188                              struct page **pages,
 189                              unsigned long nr_dest_pages,
 190                              unsigned long *out_pages,
 191                              unsigned long *total_in,
 192                              unsigned long *total_out,
 193                              unsigned long max_out)
 194{
 195        int ret;
 196        struct workspace *workspace;
 197        char *data_in;
 198        char *cpage_out;
 199        int nr_pages = 0;
 200        struct page *in_page = NULL;
 201        struct page *out_page = NULL;
 202        int out_written = 0;
 203        int in_read = 0;
 204        unsigned long bytes_left;
 205
 206        *out_pages = 0;
 207        *total_out = 0;
 208        *total_in = 0;
 209
 210        workspace = find_zlib_workspace();
 211        if (IS_ERR(workspace))
 212                return -1;
 213
 214        if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
 215                printk(KERN_WARNING "deflateInit failed\n");
 216                ret = -1;
 217                goto out;
 218        }
 219
 220        workspace->def_strm.total_in = 0;
 221        workspace->def_strm.total_out = 0;
 222
 223        in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
 224        data_in = kmap(in_page);
 225
 226        out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 227        cpage_out = kmap(out_page);
 228        pages[0] = out_page;
 229        nr_pages = 1;
 230
 231        workspace->def_strm.next_in = data_in;
 232        workspace->def_strm.next_out = cpage_out;
 233        workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
 234        workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
 235
 236        out_written = 0;
 237        in_read = 0;
 238
 239        while (workspace->def_strm.total_in < len) {
 240                ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
 241                if (ret != Z_OK) {
 242                        printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
 243                               ret);
 244                        zlib_deflateEnd(&workspace->def_strm);
 245                        ret = -1;
 246                        goto out;
 247                }
 248
 249                /* we're making it bigger, give up */
 250                if (workspace->def_strm.total_in > 8192 &&
 251                    workspace->def_strm.total_in <
 252                    workspace->def_strm.total_out) {
 253                        ret = -1;
 254                        goto out;
 255                }
 256                /* we need another page for writing out.  Test this
 257                 * before the total_in so we will pull in a new page for
 258                 * the stream end if required
 259                 */
 260                if (workspace->def_strm.avail_out == 0) {
 261                        kunmap(out_page);
 262                        if (nr_pages == nr_dest_pages) {
 263                                out_page = NULL;
 264                                ret = -1;
 265                                goto out;
 266                        }
 267                        out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 268                        cpage_out = kmap(out_page);
 269                        pages[nr_pages] = out_page;
 270                        nr_pages++;
 271                        workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
 272                        workspace->def_strm.next_out = cpage_out;
 273                }
 274                /* we're all done */
 275                if (workspace->def_strm.total_in >= len)
 276                        break;
 277
 278                /* we've read in a full page, get a new one */
 279                if (workspace->def_strm.avail_in == 0) {
 280                        if (workspace->def_strm.total_out > max_out)
 281                                break;
 282
 283                        bytes_left = len - workspace->def_strm.total_in;
 284                        kunmap(in_page);
 285                        page_cache_release(in_page);
 286
 287                        start += PAGE_CACHE_SIZE;
 288                        in_page = find_get_page(mapping,
 289                                                start >> PAGE_CACHE_SHIFT);
 290                        data_in = kmap(in_page);
 291                        workspace->def_strm.avail_in = min(bytes_left,
 292                                                           PAGE_CACHE_SIZE);
 293                        workspace->def_strm.next_in = data_in;
 294                }
 295        }
 296        workspace->def_strm.avail_in = 0;
 297        ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
 298        zlib_deflateEnd(&workspace->def_strm);
 299
 300        if (ret != Z_STREAM_END) {
 301                ret = -1;
 302                goto out;
 303        }
 304
 305        if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
 306                ret = -1;
 307                goto out;
 308        }
 309
 310        ret = 0;
 311        *total_out = workspace->def_strm.total_out;
 312        *total_in = workspace->def_strm.total_in;
 313out:
 314        *out_pages = nr_pages;
 315        if (out_page)
 316                kunmap(out_page);
 317
 318        if (in_page) {
 319                kunmap(in_page);
 320                page_cache_release(in_page);
 321        }
 322        free_workspace(workspace);
 323        return ret;
 324}
 325
 326/*
 327 * pages_in is an array of pages with compressed data.
 328 *
 329 * disk_start is the starting logical offset of this array in the file
 330 *
 331 * bvec is a bio_vec of pages from the file that we want to decompress into
 332 *
 333 * vcnt is the count of pages in the biovec
 334 *
 335 * srclen is the number of bytes in pages_in
 336 *
 337 * The basic idea is that we have a bio that was created by readpages.
 338 * The pages in the bio are for the uncompressed data, and they may not
 339 * be contiguous.  They all correspond to the range of bytes covered by
 340 * the compressed extent.
 341 */
 342int btrfs_zlib_decompress_biovec(struct page **pages_in,
 343                              u64 disk_start,
 344                              struct bio_vec *bvec,
 345                              int vcnt,
 346                              size_t srclen)
 347{
 348        int ret = 0;
 349        int wbits = MAX_WBITS;
 350        struct workspace *workspace;
 351        char *data_in;
 352        size_t total_out = 0;
 353        unsigned long page_bytes_left;
 354        unsigned long page_in_index = 0;
 355        unsigned long page_out_index = 0;
 356        struct page *page_out;
 357        unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
 358                                        PAGE_CACHE_SIZE;
 359        unsigned long buf_start;
 360        unsigned long buf_offset;
 361        unsigned long bytes;
 362        unsigned long working_bytes;
 363        unsigned long pg_offset;
 364        unsigned long start_byte;
 365        unsigned long current_buf_start;
 366        char *kaddr;
 367
 368        workspace = find_zlib_workspace();
 369        if (IS_ERR(workspace))
 370                return -ENOMEM;
 371
 372        data_in = kmap(pages_in[page_in_index]);
 373        workspace->inf_strm.next_in = data_in;
 374        workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
 375        workspace->inf_strm.total_in = 0;
 376
 377        workspace->inf_strm.total_out = 0;
 378        workspace->inf_strm.next_out = workspace->buf;
 379        workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 380        page_out = bvec[page_out_index].bv_page;
 381        page_bytes_left = PAGE_CACHE_SIZE;
 382        pg_offset = 0;
 383
 384        /* If it's deflate, and it's got no preset dictionary, then
 385           we can tell zlib to skip the adler32 check. */
 386        if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
 387            ((data_in[0] & 0x0f) == Z_DEFLATED) &&
 388            !(((data_in[0]<<8) + data_in[1]) % 31)) {
 389
 390                wbits = -((data_in[0] >> 4) + 8);
 391                workspace->inf_strm.next_in += 2;
 392                workspace->inf_strm.avail_in -= 2;
 393        }
 394
 395        if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 396                printk(KERN_WARNING "inflateInit failed\n");
 397                ret = -1;
 398                goto out;
 399        }
 400        while (workspace->inf_strm.total_in < srclen) {
 401                ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
 402                if (ret != Z_OK && ret != Z_STREAM_END)
 403                        break;
 404                /*
 405                 * buf start is the byte offset we're of the start of
 406                 * our workspace buffer
 407                 */
 408                buf_start = total_out;
 409
 410                /* total_out is the last byte of the workspace buffer */
 411                total_out = workspace->inf_strm.total_out;
 412
 413                working_bytes = total_out - buf_start;
 414
 415                /*
 416                 * start byte is the first byte of the page we're currently
 417                 * copying into relative to the start of the compressed data.
 418                 */
 419                start_byte = page_offset(page_out) - disk_start;
 420
 421                if (working_bytes == 0) {
 422                        /* we didn't make progress in this inflate
 423                         * call, we're done
 424                         */
 425                        if (ret != Z_STREAM_END)
 426                                ret = -1;
 427                        break;
 428                }
 429
 430                /* we haven't yet hit data corresponding to this page */
 431                if (total_out <= start_byte)
 432                        goto next;
 433
 434                /*
 435                 * the start of the data we care about is offset into
 436                 * the middle of our working buffer
 437                 */
 438                if (total_out > start_byte && buf_start < start_byte) {
 439                        buf_offset = start_byte - buf_start;
 440                        working_bytes -= buf_offset;
 441                } else {
 442                        buf_offset = 0;
 443                }
 444                current_buf_start = buf_start;
 445
 446                /* copy bytes from the working buffer into the pages */
 447                while (working_bytes > 0) {
 448                        bytes = min(PAGE_CACHE_SIZE - pg_offset,
 449                                    PAGE_CACHE_SIZE - buf_offset);
 450                        bytes = min(bytes, working_bytes);
 451                        kaddr = kmap_atomic(page_out, KM_USER0);
 452                        memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
 453                               bytes);
 454                        kunmap_atomic(kaddr, KM_USER0);
 455                        flush_dcache_page(page_out);
 456
 457                        pg_offset += bytes;
 458                        page_bytes_left -= bytes;
 459                        buf_offset += bytes;
 460                        working_bytes -= bytes;
 461                        current_buf_start += bytes;
 462
 463                        /* check if we need to pick another page */
 464                        if (page_bytes_left == 0) {
 465                                page_out_index++;
 466                                if (page_out_index >= vcnt) {
 467                                        ret = 0;
 468                                        goto done;
 469                                }
 470
 471                                page_out = bvec[page_out_index].bv_page;
 472                                pg_offset = 0;
 473                                page_bytes_left = PAGE_CACHE_SIZE;
 474                                start_byte = page_offset(page_out) - disk_start;
 475
 476                                /*
 477                                 * make sure our new page is covered by this
 478                                 * working buffer
 479                                 */
 480                                if (total_out <= start_byte)
 481                                        goto next;
 482
 483                                /* the next page in the biovec might not
 484                                 * be adjacent to the last page, but it
 485                                 * might still be found inside this working
 486                                 * buffer.  bump our offset pointer
 487                                 */
 488                                if (total_out > start_byte &&
 489                                    current_buf_start < start_byte) {
 490                                        buf_offset = start_byte - buf_start;
 491                                        working_bytes = total_out - start_byte;
 492                                        current_buf_start = buf_start +
 493                                                buf_offset;
 494                                }
 495                        }
 496                }
 497next:
 498                workspace->inf_strm.next_out = workspace->buf;
 499                workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 500
 501                if (workspace->inf_strm.avail_in == 0) {
 502                        unsigned long tmp;
 503                        kunmap(pages_in[page_in_index]);
 504                        page_in_index++;
 505                        if (page_in_index >= total_pages_in) {
 506                                data_in = NULL;
 507                                break;
 508                        }
 509                        data_in = kmap(pages_in[page_in_index]);
 510                        workspace->inf_strm.next_in = data_in;
 511                        tmp = srclen - workspace->inf_strm.total_in;
 512                        workspace->inf_strm.avail_in = min(tmp,
 513                                                           PAGE_CACHE_SIZE);
 514                }
 515        }
 516        if (ret != Z_STREAM_END)
 517                ret = -1;
 518        else
 519                ret = 0;
 520done:
 521        zlib_inflateEnd(&workspace->inf_strm);
 522        if (data_in)
 523                kunmap(pages_in[page_in_index]);
 524out:
 525        free_workspace(workspace);
 526        return ret;
 527}
 528
 529/*
 530 * a less complex decompression routine.  Our compressed data fits in a
 531 * single page, and we want to read a single page out of it.
 532 * start_byte tells us the offset into the compressed data we're interested in
 533 */
 534int btrfs_zlib_decompress(unsigned char *data_in,
 535                          struct page *dest_page,
 536                          unsigned long start_byte,
 537                          size_t srclen, size_t destlen)
 538{
 539        int ret = 0;
 540        int wbits = MAX_WBITS;
 541        struct workspace *workspace;
 542        unsigned long bytes_left = destlen;
 543        unsigned long total_out = 0;
 544        char *kaddr;
 545
 546        if (destlen > PAGE_CACHE_SIZE)
 547                return -ENOMEM;
 548
 549        workspace = find_zlib_workspace();
 550        if (IS_ERR(workspace))
 551                return -ENOMEM;
 552
 553        workspace->inf_strm.next_in = data_in;
 554        workspace->inf_strm.avail_in = srclen;
 555        workspace->inf_strm.total_in = 0;
 556
 557        workspace->inf_strm.next_out = workspace->buf;
 558        workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 559        workspace->inf_strm.total_out = 0;
 560        /* If it's deflate, and it's got no preset dictionary, then
 561           we can tell zlib to skip the adler32 check. */
 562        if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
 563            ((data_in[0] & 0x0f) == Z_DEFLATED) &&
 564            !(((data_in[0]<<8) + data_in[1]) % 31)) {
 565
 566                wbits = -((data_in[0] >> 4) + 8);
 567                workspace->inf_strm.next_in += 2;
 568                workspace->inf_strm.avail_in -= 2;
 569        }
 570
 571        if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 572                printk(KERN_WARNING "inflateInit failed\n");
 573                ret = -1;
 574                goto out;
 575        }
 576
 577        while (bytes_left > 0) {
 578                unsigned long buf_start;
 579                unsigned long buf_offset;
 580                unsigned long bytes;
 581                unsigned long pg_offset = 0;
 582
 583                ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
 584                if (ret != Z_OK && ret != Z_STREAM_END)
 585                        break;
 586
 587                buf_start = total_out;
 588                total_out = workspace->inf_strm.total_out;
 589
 590                if (total_out == buf_start) {
 591                        ret = -1;
 592                        break;
 593                }
 594
 595                if (total_out <= start_byte)
 596                        goto next;
 597
 598                if (total_out > start_byte && buf_start < start_byte)
 599                        buf_offset = start_byte - buf_start;
 600                else
 601                        buf_offset = 0;
 602
 603                bytes = min(PAGE_CACHE_SIZE - pg_offset,
 604                            PAGE_CACHE_SIZE - buf_offset);
 605                bytes = min(bytes, bytes_left);
 606
 607                kaddr = kmap_atomic(dest_page, KM_USER0);
 608                memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
 609                kunmap_atomic(kaddr, KM_USER0);
 610
 611                pg_offset += bytes;
 612                bytes_left -= bytes;
 613next:
 614                workspace->inf_strm.next_out = workspace->buf;
 615                workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 616        }
 617
 618        if (ret != Z_STREAM_END && bytes_left != 0)
 619                ret = -1;
 620        else
 621                ret = 0;
 622
 623        zlib_inflateEnd(&workspace->inf_strm);
 624out:
 625        free_workspace(workspace);
 626        return ret;
 627}
 628
 629void btrfs_zlib_exit(void)
 630{
 631    free_workspaces();
 632}
 633