linux/fs/ocfs2/localalloc.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * localalloc.c
   5 *
   6 * Node local data allocation
   7 *
   8 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation; either
  13 * version 2 of the License, or (at your option) any later version.
  14 *
  15 * This program is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU General Public
  21 * License along with this program; if not, write to the
  22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 * Boston, MA 021110-1307, USA.
  24 */
  25
  26#include <linux/fs.h>
  27#include <linux/types.h>
  28#include <linux/slab.h>
  29#include <linux/highmem.h>
  30#include <linux/bitops.h>
  31
  32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
  33#include <cluster/masklog.h>
  34
  35#include "ocfs2.h"
  36
  37#include "alloc.h"
  38#include "blockcheck.h"
  39#include "dlmglue.h"
  40#include "inode.h"
  41#include "journal.h"
  42#include "localalloc.h"
  43#include "suballoc.h"
  44#include "super.h"
  45#include "sysfile.h"
  46
  47#include "buffer_head_io.h"
  48
  49#define OCFS2_LOCAL_ALLOC(dinode)       (&((dinode)->id2.i_lab))
  50
  51static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
  52
  53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  54                                             struct ocfs2_dinode *alloc,
  55                                             u32 numbits);
  56
  57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
  58
  59static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  60                                    handle_t *handle,
  61                                    struct ocfs2_dinode *alloc,
  62                                    struct inode *main_bm_inode,
  63                                    struct buffer_head *main_bm_bh);
  64
  65static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  66                                                struct ocfs2_alloc_context **ac,
  67                                                struct inode **bitmap_inode,
  68                                                struct buffer_head **bitmap_bh);
  69
  70static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  71                                        handle_t *handle,
  72                                        struct ocfs2_alloc_context *ac);
  73
  74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  75                                          struct inode *local_alloc_inode);
  76
  77static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
  78{
  79        return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
  80                osb->local_alloc_state == OCFS2_LA_ENABLED);
  81}
  82
  83void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
  84                                      unsigned int num_clusters)
  85{
  86        spin_lock(&osb->osb_lock);
  87        if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
  88            osb->local_alloc_state == OCFS2_LA_THROTTLED)
  89                if (num_clusters >= osb->local_alloc_default_bits) {
  90                        cancel_delayed_work(&osb->la_enable_wq);
  91                        osb->local_alloc_state = OCFS2_LA_ENABLED;
  92                }
  93        spin_unlock(&osb->osb_lock);
  94}
  95
  96void ocfs2_la_enable_worker(struct work_struct *work)
  97{
  98        struct ocfs2_super *osb =
  99                container_of(work, struct ocfs2_super,
 100                             la_enable_wq.work);
 101        spin_lock(&osb->osb_lock);
 102        osb->local_alloc_state = OCFS2_LA_ENABLED;
 103        spin_unlock(&osb->osb_lock);
 104}
 105
 106/*
 107 * Tell us whether a given allocation should use the local alloc
 108 * file. Otherwise, it has to go to the main bitmap.
 109 *
 110 * This function does semi-dirty reads of local alloc size and state!
 111 * This is ok however, as the values are re-checked once under mutex.
 112 */
 113int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
 114{
 115        int ret = 0;
 116        int la_bits;
 117
 118        spin_lock(&osb->osb_lock);
 119        la_bits = osb->local_alloc_bits;
 120
 121        if (!ocfs2_la_state_enabled(osb))
 122                goto bail;
 123
 124        /* la_bits should be at least twice the size (in clusters) of
 125         * a new block group. We want to be sure block group
 126         * allocations go through the local alloc, so allow an
 127         * allocation to take up to half the bitmap. */
 128        if (bits > (la_bits / 2))
 129                goto bail;
 130
 131        ret = 1;
 132bail:
 133        mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
 134             osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
 135        spin_unlock(&osb->osb_lock);
 136        return ret;
 137}
 138
 139int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 140{
 141        int status = 0;
 142        struct ocfs2_dinode *alloc = NULL;
 143        struct buffer_head *alloc_bh = NULL;
 144        u32 num_used;
 145        struct inode *inode = NULL;
 146        struct ocfs2_local_alloc *la;
 147
 148        mlog_entry_void();
 149
 150        if (osb->local_alloc_bits == 0)
 151                goto bail;
 152
 153        if (osb->local_alloc_bits >= osb->bitmap_cpg) {
 154                mlog(ML_NOTICE, "Requested local alloc window %d is larger "
 155                     "than max possible %u. Using defaults.\n",
 156                     osb->local_alloc_bits, (osb->bitmap_cpg - 1));
 157                osb->local_alloc_bits =
 158                        ocfs2_megabytes_to_clusters(osb->sb,
 159                                                    OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
 160        }
 161
 162        /* read the alloc off disk */
 163        inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
 164                                            osb->slot_num);
 165        if (!inode) {
 166                status = -EINVAL;
 167                mlog_errno(status);
 168                goto bail;
 169        }
 170
 171        status = ocfs2_read_inode_block_full(inode, &alloc_bh,
 172                                             OCFS2_BH_IGNORE_CACHE);
 173        if (status < 0) {
 174                mlog_errno(status);
 175                goto bail;
 176        }
 177
 178        alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
 179        la = OCFS2_LOCAL_ALLOC(alloc);
 180
 181        if (!(le32_to_cpu(alloc->i_flags) &
 182            (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
 183                mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
 184                     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 185                status = -EINVAL;
 186                goto bail;
 187        }
 188
 189        if ((la->la_size == 0) ||
 190            (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
 191                mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
 192                     le16_to_cpu(la->la_size));
 193                status = -EINVAL;
 194                goto bail;
 195        }
 196
 197        /* do a little verification. */
 198        num_used = ocfs2_local_alloc_count_bits(alloc);
 199
 200        /* hopefully the local alloc has always been recovered before
 201         * we load it. */
 202        if (num_used
 203            || alloc->id1.bitmap1.i_used
 204            || alloc->id1.bitmap1.i_total
 205            || la->la_bm_off)
 206                mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
 207                     "found = %u, set = %u, taken = %u, off = %u\n",
 208                     num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
 209                     le32_to_cpu(alloc->id1.bitmap1.i_total),
 210                     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
 211
 212        osb->local_alloc_bh = alloc_bh;
 213        osb->local_alloc_state = OCFS2_LA_ENABLED;
 214
 215bail:
 216        if (status < 0)
 217                brelse(alloc_bh);
 218        if (inode)
 219                iput(inode);
 220
 221        mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
 222
 223        mlog_exit(status);
 224        return status;
 225}
 226
 227/*
 228 * return any unused bits to the bitmap and write out a clean
 229 * local_alloc.
 230 *
 231 * local_alloc_bh is optional. If not passed, we will simply use the
 232 * one off osb. If you do pass it however, be warned that it *will* be
 233 * returned brelse'd and NULL'd out.*/
 234void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 235{
 236        int status;
 237        handle_t *handle;
 238        struct inode *local_alloc_inode = NULL;
 239        struct buffer_head *bh = NULL;
 240        struct buffer_head *main_bm_bh = NULL;
 241        struct inode *main_bm_inode = NULL;
 242        struct ocfs2_dinode *alloc_copy = NULL;
 243        struct ocfs2_dinode *alloc = NULL;
 244
 245        mlog_entry_void();
 246
 247        cancel_delayed_work(&osb->la_enable_wq);
 248        flush_workqueue(ocfs2_wq);
 249
 250        if (osb->local_alloc_state == OCFS2_LA_UNUSED)
 251                goto out;
 252
 253        local_alloc_inode =
 254                ocfs2_get_system_file_inode(osb,
 255                                            LOCAL_ALLOC_SYSTEM_INODE,
 256                                            osb->slot_num);
 257        if (!local_alloc_inode) {
 258                status = -ENOENT;
 259                mlog_errno(status);
 260                goto out;
 261        }
 262
 263        osb->local_alloc_state = OCFS2_LA_DISABLED;
 264
 265        main_bm_inode = ocfs2_get_system_file_inode(osb,
 266                                                    GLOBAL_BITMAP_SYSTEM_INODE,
 267                                                    OCFS2_INVALID_SLOT);
 268        if (!main_bm_inode) {
 269                status = -EINVAL;
 270                mlog_errno(status);
 271                goto out;
 272        }
 273
 274        mutex_lock(&main_bm_inode->i_mutex);
 275
 276        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
 277        if (status < 0) {
 278                mlog_errno(status);
 279                goto out_mutex;
 280        }
 281
 282        /* WINDOW_MOVE_CREDITS is a bit heavy... */
 283        handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
 284        if (IS_ERR(handle)) {
 285                mlog_errno(PTR_ERR(handle));
 286                handle = NULL;
 287                goto out_unlock;
 288        }
 289
 290        bh = osb->local_alloc_bh;
 291        alloc = (struct ocfs2_dinode *) bh->b_data;
 292
 293        alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
 294        if (!alloc_copy) {
 295                status = -ENOMEM;
 296                goto out_commit;
 297        }
 298        memcpy(alloc_copy, alloc, bh->b_size);
 299
 300        status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
 301                                         bh, OCFS2_JOURNAL_ACCESS_WRITE);
 302        if (status < 0) {
 303                mlog_errno(status);
 304                goto out_commit;
 305        }
 306
 307        ocfs2_clear_local_alloc(alloc);
 308
 309        status = ocfs2_journal_dirty(handle, bh);
 310        if (status < 0) {
 311                mlog_errno(status);
 312                goto out_commit;
 313        }
 314
 315        brelse(bh);
 316        osb->local_alloc_bh = NULL;
 317        osb->local_alloc_state = OCFS2_LA_UNUSED;
 318
 319        status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
 320                                          main_bm_inode, main_bm_bh);
 321        if (status < 0)
 322                mlog_errno(status);
 323
 324out_commit:
 325        ocfs2_commit_trans(osb, handle);
 326
 327out_unlock:
 328        brelse(main_bm_bh);
 329
 330        ocfs2_inode_unlock(main_bm_inode, 1);
 331
 332out_mutex:
 333        mutex_unlock(&main_bm_inode->i_mutex);
 334        iput(main_bm_inode);
 335
 336out:
 337        if (local_alloc_inode)
 338                iput(local_alloc_inode);
 339
 340        if (alloc_copy)
 341                kfree(alloc_copy);
 342
 343        mlog_exit_void();
 344}
 345
 346/*
 347 * We want to free the bitmap bits outside of any recovery context as
 348 * we'll need a cluster lock to do so, but we must clear the local
 349 * alloc before giving up the recovered nodes journal. To solve this,
 350 * we kmalloc a copy of the local alloc before it's change for the
 351 * caller to process with ocfs2_complete_local_alloc_recovery
 352 */
 353int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
 354                                     int slot_num,
 355                                     struct ocfs2_dinode **alloc_copy)
 356{
 357        int status = 0;
 358        struct buffer_head *alloc_bh = NULL;
 359        struct inode *inode = NULL;
 360        struct ocfs2_dinode *alloc;
 361
 362        mlog_entry("(slot_num = %d)\n", slot_num);
 363
 364        *alloc_copy = NULL;
 365
 366        inode = ocfs2_get_system_file_inode(osb,
 367                                            LOCAL_ALLOC_SYSTEM_INODE,
 368                                            slot_num);
 369        if (!inode) {
 370                status = -EINVAL;
 371                mlog_errno(status);
 372                goto bail;
 373        }
 374
 375        mutex_lock(&inode->i_mutex);
 376
 377        status = ocfs2_read_inode_block_full(inode, &alloc_bh,
 378                                             OCFS2_BH_IGNORE_CACHE);
 379        if (status < 0) {
 380                mlog_errno(status);
 381                goto bail;
 382        }
 383
 384        *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
 385        if (!(*alloc_copy)) {
 386                status = -ENOMEM;
 387                goto bail;
 388        }
 389        memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
 390
 391        alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
 392        ocfs2_clear_local_alloc(alloc);
 393
 394        ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
 395        status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
 396        if (status < 0)
 397                mlog_errno(status);
 398
 399bail:
 400        if ((status < 0) && (*alloc_copy)) {
 401                kfree(*alloc_copy);
 402                *alloc_copy = NULL;
 403        }
 404
 405        brelse(alloc_bh);
 406
 407        if (inode) {
 408                mutex_unlock(&inode->i_mutex);
 409                iput(inode);
 410        }
 411
 412        mlog_exit(status);
 413        return status;
 414}
 415
 416/*
 417 * Step 2: By now, we've completed the journal recovery, we've stamped
 418 * a clean local alloc on disk and dropped the node out of the
 419 * recovery map. Dlm locks will no longer stall, so lets clear out the
 420 * main bitmap.
 421 */
 422int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 423                                        struct ocfs2_dinode *alloc)
 424{
 425        int status;
 426        handle_t *handle;
 427        struct buffer_head *main_bm_bh = NULL;
 428        struct inode *main_bm_inode;
 429
 430        mlog_entry_void();
 431
 432        main_bm_inode = ocfs2_get_system_file_inode(osb,
 433                                                    GLOBAL_BITMAP_SYSTEM_INODE,
 434                                                    OCFS2_INVALID_SLOT);
 435        if (!main_bm_inode) {
 436                status = -EINVAL;
 437                mlog_errno(status);
 438                goto out;
 439        }
 440
 441        mutex_lock(&main_bm_inode->i_mutex);
 442
 443        status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
 444        if (status < 0) {
 445                mlog_errno(status);
 446                goto out_mutex;
 447        }
 448
 449        handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
 450        if (IS_ERR(handle)) {
 451                status = PTR_ERR(handle);
 452                handle = NULL;
 453                mlog_errno(status);
 454                goto out_unlock;
 455        }
 456
 457        /* we want the bitmap change to be recorded on disk asap */
 458        handle->h_sync = 1;
 459
 460        status = ocfs2_sync_local_to_main(osb, handle, alloc,
 461                                          main_bm_inode, main_bm_bh);
 462        if (status < 0)
 463                mlog_errno(status);
 464
 465        ocfs2_commit_trans(osb, handle);
 466
 467out_unlock:
 468        ocfs2_inode_unlock(main_bm_inode, 1);
 469
 470out_mutex:
 471        mutex_unlock(&main_bm_inode->i_mutex);
 472
 473        brelse(main_bm_bh);
 474
 475        iput(main_bm_inode);
 476
 477out:
 478        if (!status)
 479                ocfs2_init_inode_steal_slot(osb);
 480        mlog_exit(status);
 481        return status;
 482}
 483
 484/* Check to see if the local alloc window is within ac->ac_max_block */
 485static int ocfs2_local_alloc_in_range(struct inode *inode,
 486                                      struct ocfs2_alloc_context *ac,
 487                                      u32 bits_wanted)
 488{
 489        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 490        struct ocfs2_dinode *alloc;
 491        struct ocfs2_local_alloc *la;
 492        int start;
 493        u64 block_off;
 494
 495        if (!ac->ac_max_block)
 496                return 1;
 497
 498        alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
 499        la = OCFS2_LOCAL_ALLOC(alloc);
 500
 501        start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
 502        if (start == -1) {
 503                mlog_errno(-ENOSPC);
 504                return 0;
 505        }
 506
 507        /*
 508         * Converting (bm_off + start + bits_wanted) to blocks gives us
 509         * the blkno just past our actual allocation.  This is perfect
 510         * to compare with ac_max_block.
 511         */
 512        block_off = ocfs2_clusters_to_blocks(inode->i_sb,
 513                                             le32_to_cpu(la->la_bm_off) +
 514                                             start + bits_wanted);
 515        mlog(0, "Checking %llu against %llu\n",
 516             (unsigned long long)block_off,
 517             (unsigned long long)ac->ac_max_block);
 518        if (block_off > ac->ac_max_block)
 519                return 0;
 520
 521        return 1;
 522}
 523
 524/*
 525 * make sure we've got at least bits_wanted contiguous bits in the
 526 * local alloc. You lose them when you drop i_mutex.
 527 *
 528 * We will add ourselves to the transaction passed in, but may start
 529 * our own in order to shift windows.
 530 */
 531int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 532                                   u32 bits_wanted,
 533                                   struct ocfs2_alloc_context *ac)
 534{
 535        int status;
 536        struct ocfs2_dinode *alloc;
 537        struct inode *local_alloc_inode;
 538        unsigned int free_bits;
 539
 540        mlog_entry_void();
 541
 542        BUG_ON(!ac);
 543
 544        local_alloc_inode =
 545                ocfs2_get_system_file_inode(osb,
 546                                            LOCAL_ALLOC_SYSTEM_INODE,
 547                                            osb->slot_num);
 548        if (!local_alloc_inode) {
 549                status = -ENOENT;
 550                mlog_errno(status);
 551                goto bail;
 552        }
 553
 554        mutex_lock(&local_alloc_inode->i_mutex);
 555
 556        /*
 557         * We must double check state and allocator bits because
 558         * another process may have changed them while holding i_mutex.
 559         */
 560        spin_lock(&osb->osb_lock);
 561        if (!ocfs2_la_state_enabled(osb) ||
 562            (bits_wanted > osb->local_alloc_bits)) {
 563                spin_unlock(&osb->osb_lock);
 564                status = -ENOSPC;
 565                goto bail;
 566        }
 567        spin_unlock(&osb->osb_lock);
 568
 569        alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
 570
 571#ifdef CONFIG_OCFS2_DEBUG_FS
 572        if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
 573            ocfs2_local_alloc_count_bits(alloc)) {
 574                ocfs2_error(osb->sb, "local alloc inode %llu says it has "
 575                            "%u free bits, but a count shows %u",
 576                            (unsigned long long)le64_to_cpu(alloc->i_blkno),
 577                            le32_to_cpu(alloc->id1.bitmap1.i_used),
 578                            ocfs2_local_alloc_count_bits(alloc));
 579                status = -EIO;
 580                goto bail;
 581        }
 582#endif
 583
 584        free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
 585                le32_to_cpu(alloc->id1.bitmap1.i_used);
 586        if (bits_wanted > free_bits) {
 587                /* uhoh, window change time. */
 588                status =
 589                        ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
 590                if (status < 0) {
 591                        if (status != -ENOSPC)
 592                                mlog_errno(status);
 593                        goto bail;
 594                }
 595
 596                /*
 597                 * Under certain conditions, the window slide code
 598                 * might have reduced the number of bits available or
 599                 * disabled the the local alloc entirely. Re-check
 600                 * here and return -ENOSPC if necessary.
 601                 */
 602                status = -ENOSPC;
 603                if (!ocfs2_la_state_enabled(osb))
 604                        goto bail;
 605
 606                free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
 607                        le32_to_cpu(alloc->id1.bitmap1.i_used);
 608                if (bits_wanted > free_bits)
 609                        goto bail;
 610        }
 611
 612        if (ac->ac_max_block)
 613                mlog(0, "Calling in_range for max block %llu\n",
 614                     (unsigned long long)ac->ac_max_block);
 615
 616        if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
 617                                        bits_wanted)) {
 618                /*
 619                 * The window is outside ac->ac_max_block.
 620                 * This errno tells the caller to keep localalloc enabled
 621                 * but to get the allocation from the main bitmap.
 622                 */
 623                status = -EFBIG;
 624                goto bail;
 625        }
 626
 627        ac->ac_inode = local_alloc_inode;
 628        /* We should never use localalloc from another slot */
 629        ac->ac_alloc_slot = osb->slot_num;
 630        ac->ac_which = OCFS2_AC_USE_LOCAL;
 631        get_bh(osb->local_alloc_bh);
 632        ac->ac_bh = osb->local_alloc_bh;
 633        status = 0;
 634bail:
 635        if (status < 0 && local_alloc_inode) {
 636                mutex_unlock(&local_alloc_inode->i_mutex);
 637                iput(local_alloc_inode);
 638        }
 639
 640        mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
 641             status);
 642
 643        mlog_exit(status);
 644        return status;
 645}
 646
 647int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 648                                 handle_t *handle,
 649                                 struct ocfs2_alloc_context *ac,
 650                                 u32 bits_wanted,
 651                                 u32 *bit_off,
 652                                 u32 *num_bits)
 653{
 654        int status, start;
 655        struct inode *local_alloc_inode;
 656        void *bitmap;
 657        struct ocfs2_dinode *alloc;
 658        struct ocfs2_local_alloc *la;
 659
 660        mlog_entry_void();
 661        BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
 662
 663        local_alloc_inode = ac->ac_inode;
 664        alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
 665        la = OCFS2_LOCAL_ALLOC(alloc);
 666
 667        start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
 668        if (start == -1) {
 669                /* TODO: Shouldn't we just BUG here? */
 670                status = -ENOSPC;
 671                mlog_errno(status);
 672                goto bail;
 673        }
 674
 675        bitmap = la->la_bitmap;
 676        *bit_off = le32_to_cpu(la->la_bm_off) + start;
 677        /* local alloc is always contiguous by nature -- we never
 678         * delete bits from it! */
 679        *num_bits = bits_wanted;
 680
 681        status = ocfs2_journal_access_di(handle,
 682                                         INODE_CACHE(local_alloc_inode),
 683                                         osb->local_alloc_bh,
 684                                         OCFS2_JOURNAL_ACCESS_WRITE);
 685        if (status < 0) {
 686                mlog_errno(status);
 687                goto bail;
 688        }
 689
 690        while(bits_wanted--)
 691                ocfs2_set_bit(start++, bitmap);
 692
 693        le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
 694
 695        status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
 696        if (status < 0) {
 697                mlog_errno(status);
 698                goto bail;
 699        }
 700
 701        status = 0;
 702bail:
 703        mlog_exit(status);
 704        return status;
 705}
 706
 707static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
 708{
 709        int i;
 710        u8 *buffer;
 711        u32 count = 0;
 712        struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
 713
 714        mlog_entry_void();
 715
 716        buffer = la->la_bitmap;
 717        for (i = 0; i < le16_to_cpu(la->la_size); i++)
 718                count += hweight8(buffer[i]);
 719
 720        mlog_exit(count);
 721        return count;
 722}
 723
 724static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 725                                             struct ocfs2_dinode *alloc,
 726                                             u32 numbits)
 727{
 728        int numfound, bitoff, left, startoff, lastzero;
 729        void *bitmap = NULL;
 730
 731        mlog_entry("(numbits wanted = %u)\n", numbits);
 732
 733        if (!alloc->id1.bitmap1.i_total) {
 734                mlog(0, "No bits in my window!\n");
 735                bitoff = -1;
 736                goto bail;
 737        }
 738
 739        bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
 740
 741        numfound = bitoff = startoff = 0;
 742        lastzero = -1;
 743        left = le32_to_cpu(alloc->id1.bitmap1.i_total);
 744        while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
 745                if (bitoff == left) {
 746                        /* mlog(0, "bitoff (%d) == left", bitoff); */
 747                        break;
 748                }
 749                /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
 750                   "numfound = %d\n", bitoff, startoff, numfound);*/
 751
 752                /* Ok, we found a zero bit... is it contig. or do we
 753                 * start over?*/
 754                if (bitoff == startoff) {
 755                        /* we found a zero */
 756                        numfound++;
 757                        startoff++;
 758                } else {
 759                        /* got a zero after some ones */
 760                        numfound = 1;
 761                        startoff = bitoff+1;
 762                }
 763                /* we got everything we needed */
 764                if (numfound == numbits) {
 765                        /* mlog(0, "Found it all!\n"); */
 766                        break;
 767                }
 768        }
 769
 770        mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
 771             numfound);
 772
 773        if (numfound == numbits)
 774                bitoff = startoff - numfound;
 775        else
 776                bitoff = -1;
 777
 778bail:
 779        mlog_exit(bitoff);
 780        return bitoff;
 781}
 782
 783static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
 784{
 785        struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
 786        int i;
 787        mlog_entry_void();
 788
 789        alloc->id1.bitmap1.i_total = 0;
 790        alloc->id1.bitmap1.i_used = 0;
 791        la->la_bm_off = 0;
 792        for(i = 0; i < le16_to_cpu(la->la_size); i++)
 793                la->la_bitmap[i] = 0;
 794
 795        mlog_exit_void();
 796}
 797
 798#if 0
 799/* turn this on and uncomment below to aid debugging window shifts. */
 800static void ocfs2_verify_zero_bits(unsigned long *bitmap,
 801                                   unsigned int start,
 802                                   unsigned int count)
 803{
 804        unsigned int tmp = count;
 805        while(tmp--) {
 806                if (ocfs2_test_bit(start + tmp, bitmap)) {
 807                        printk("ocfs2_verify_zero_bits: start = %u, count = "
 808                               "%u\n", start, count);
 809                        printk("ocfs2_verify_zero_bits: bit %u is set!",
 810                               start + tmp);
 811                        BUG();
 812                }
 813        }
 814}
 815#endif
 816
 817/*
 818 * sync the local alloc to main bitmap.
 819 *
 820 * assumes you've already locked the main bitmap -- the bitmap inode
 821 * passed is used for caching.
 822 */
 823static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
 824                                    handle_t *handle,
 825                                    struct ocfs2_dinode *alloc,
 826                                    struct inode *main_bm_inode,
 827                                    struct buffer_head *main_bm_bh)
 828{
 829        int status = 0;
 830        int bit_off, left, count, start;
 831        u64 la_start_blk;
 832        u64 blkno;
 833        void *bitmap;
 834        struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
 835
 836        mlog_entry("total = %u, used = %u\n",
 837                   le32_to_cpu(alloc->id1.bitmap1.i_total),
 838                   le32_to_cpu(alloc->id1.bitmap1.i_used));
 839
 840        if (!alloc->id1.bitmap1.i_total) {
 841                mlog(0, "nothing to sync!\n");
 842                goto bail;
 843        }
 844
 845        if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
 846            le32_to_cpu(alloc->id1.bitmap1.i_total)) {
 847                mlog(0, "all bits were taken!\n");
 848                goto bail;
 849        }
 850
 851        la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
 852                                                le32_to_cpu(la->la_bm_off));
 853        bitmap = la->la_bitmap;
 854        start = count = bit_off = 0;
 855        left = le32_to_cpu(alloc->id1.bitmap1.i_total);
 856
 857        while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
 858               != -1) {
 859                if ((bit_off < left) && (bit_off == start)) {
 860                        count++;
 861                        start++;
 862                        continue;
 863                }
 864                if (count) {
 865                        blkno = la_start_blk +
 866                                ocfs2_clusters_to_blocks(osb->sb,
 867                                                         start - count);
 868
 869                        mlog(0, "freeing %u bits starting at local alloc bit "
 870                             "%u (la_start_blk = %llu, blkno = %llu)\n",
 871                             count, start - count,
 872                             (unsigned long long)la_start_blk,
 873                             (unsigned long long)blkno);
 874
 875                        status = ocfs2_free_clusters(handle, main_bm_inode,
 876                                                     main_bm_bh, blkno, count);
 877                        if (status < 0) {
 878                                mlog_errno(status);
 879                                goto bail;
 880                        }
 881                }
 882                if (bit_off >= left)
 883                        break;
 884                count = 1;
 885                start = bit_off + 1;
 886        }
 887
 888bail:
 889        mlog_exit(status);
 890        return status;
 891}
 892
 893enum ocfs2_la_event {
 894        OCFS2_LA_EVENT_SLIDE,           /* Normal window slide. */
 895        OCFS2_LA_EVENT_FRAGMENTED,      /* The global bitmap has
 896                                         * enough bits theoretically
 897                                         * free, but a contiguous
 898                                         * allocation could not be
 899                                         * found. */
 900        OCFS2_LA_EVENT_ENOSPC,          /* Global bitmap doesn't have
 901                                         * enough bits free to satisfy
 902                                         * our request. */
 903};
 904#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
 905/*
 906 * Given an event, calculate the size of our next local alloc window.
 907 *
 908 * This should always be called under i_mutex of the local alloc inode
 909 * so that local alloc disabling doesn't race with processes trying to
 910 * use the allocator.
 911 *
 912 * Returns the state which the local alloc was left in. This value can
 913 * be ignored by some paths.
 914 */
 915static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
 916                                  enum ocfs2_la_event event)
 917{
 918        unsigned int bits;
 919        int state;
 920
 921        spin_lock(&osb->osb_lock);
 922        if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
 923                WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
 924                goto out_unlock;
 925        }
 926
 927        /*
 928         * ENOSPC and fragmentation are treated similarly for now.
 929         */
 930        if (event == OCFS2_LA_EVENT_ENOSPC ||
 931            event == OCFS2_LA_EVENT_FRAGMENTED) {
 932                /*
 933                 * We ran out of contiguous space in the primary
 934                 * bitmap. Drastically reduce the number of bits used
 935                 * by local alloc until we have to disable it.
 936                 */
 937                bits = osb->local_alloc_bits >> 1;
 938                if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
 939                        /*
 940                         * By setting state to THROTTLED, we'll keep
 941                         * the number of local alloc bits used down
 942                         * until an event occurs which would give us
 943                         * reason to assume the bitmap situation might
 944                         * have changed.
 945                         */
 946                        osb->local_alloc_state = OCFS2_LA_THROTTLED;
 947                        osb->local_alloc_bits = bits;
 948                } else {
 949                        osb->local_alloc_state = OCFS2_LA_DISABLED;
 950                }
 951                queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
 952                                   OCFS2_LA_ENABLE_INTERVAL);
 953                goto out_unlock;
 954        }
 955
 956        /*
 957         * Don't increase the size of the local alloc window until we
 958         * know we might be able to fulfill the request. Otherwise, we
 959         * risk bouncing around the global bitmap during periods of
 960         * low space.
 961         */
 962        if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
 963                osb->local_alloc_bits = osb->local_alloc_default_bits;
 964
 965out_unlock:
 966        state = osb->local_alloc_state;
 967        spin_unlock(&osb->osb_lock);
 968
 969        return state;
 970}
 971
 972static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
 973                                                struct ocfs2_alloc_context **ac,
 974                                                struct inode **bitmap_inode,
 975                                                struct buffer_head **bitmap_bh)
 976{
 977        int status;
 978
 979        *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 980        if (!(*ac)) {
 981                status = -ENOMEM;
 982                mlog_errno(status);
 983                goto bail;
 984        }
 985
 986retry_enospc:
 987        (*ac)->ac_bits_wanted = osb->local_alloc_bits;
 988
 989        status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
 990        if (status == -ENOSPC) {
 991                if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
 992                    OCFS2_LA_DISABLED)
 993                        goto bail;
 994
 995                ocfs2_free_ac_resource(*ac);
 996                memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
 997                goto retry_enospc;
 998        }
 999        if (status < 0) {
1000                mlog_errno(status);
1001                goto bail;
1002        }
1003
1004        *bitmap_inode = (*ac)->ac_inode;
1005        igrab(*bitmap_inode);
1006        *bitmap_bh = (*ac)->ac_bh;
1007        get_bh(*bitmap_bh);
1008        status = 0;
1009bail:
1010        if ((status < 0) && *ac) {
1011                ocfs2_free_alloc_context(*ac);
1012                *ac = NULL;
1013        }
1014
1015        mlog_exit(status);
1016        return status;
1017}
1018
1019/*
1020 * pass it the bitmap lock in lock_bh if you have it.
1021 */
1022static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1023                                        handle_t *handle,
1024                                        struct ocfs2_alloc_context *ac)
1025{
1026        int status = 0;
1027        u32 cluster_off, cluster_count;
1028        struct ocfs2_dinode *alloc = NULL;
1029        struct ocfs2_local_alloc *la;
1030
1031        mlog_entry_void();
1032
1033        alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1034        la = OCFS2_LOCAL_ALLOC(alloc);
1035
1036        if (alloc->id1.bitmap1.i_total)
1037                mlog(0, "asking me to alloc a new window over a non-empty "
1038                     "one\n");
1039
1040        mlog(0, "Allocating %u clusters for a new window.\n",
1041             osb->local_alloc_bits);
1042
1043        /* Instruct the allocation code to try the most recently used
1044         * cluster group. We'll re-record the group used this pass
1045         * below. */
1046        ac->ac_last_group = osb->la_last_gd;
1047
1048        /* we used the generic suballoc reserve function, but we set
1049         * everything up nicely, so there's no reason why we can't use
1050         * the more specific cluster api to claim bits. */
1051        status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
1052                                      &cluster_off, &cluster_count);
1053        if (status == -ENOSPC) {
1054retry_enospc:
1055                /*
1056                 * Note: We could also try syncing the journal here to
1057                 * allow use of any free bits which the current
1058                 * transaction can't give us access to. --Mark
1059                 */
1060                if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
1061                    OCFS2_LA_DISABLED)
1062                        goto bail;
1063
1064                status = ocfs2_claim_clusters(osb, handle, ac,
1065                                              osb->local_alloc_bits,
1066                                              &cluster_off,
1067                                              &cluster_count);
1068                if (status == -ENOSPC)
1069                        goto retry_enospc;
1070                /*
1071                 * We only shrunk the *minimum* number of in our
1072                 * request - it's entirely possible that the allocator
1073                 * might give us more than we asked for.
1074                 */
1075                if (status == 0) {
1076                        spin_lock(&osb->osb_lock);
1077                        osb->local_alloc_bits = cluster_count;
1078                        spin_unlock(&osb->osb_lock);
1079                }
1080        }
1081        if (status < 0) {
1082                if (status != -ENOSPC)
1083                        mlog_errno(status);
1084                goto bail;
1085        }
1086
1087        osb->la_last_gd = ac->ac_last_group;
1088
1089        la->la_bm_off = cpu_to_le32(cluster_off);
1090        alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
1091        /* just in case... In the future when we find space ourselves,
1092         * we don't have to get all contiguous -- but we'll have to
1093         * set all previously used bits in bitmap and update
1094         * la_bits_set before setting the bits in the main bitmap. */
1095        alloc->id1.bitmap1.i_used = 0;
1096        memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1097               le16_to_cpu(la->la_size));
1098
1099        mlog(0, "New window allocated:\n");
1100        mlog(0, "window la_bm_off = %u\n",
1101             OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
1102        mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
1103
1104bail:
1105        mlog_exit(status);
1106        return status;
1107}
1108
1109/* Note that we do *NOT* lock the local alloc inode here as
1110 * it's been locked already for us. */
1111static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1112                                          struct inode *local_alloc_inode)
1113{
1114        int status = 0;
1115        struct buffer_head *main_bm_bh = NULL;
1116        struct inode *main_bm_inode = NULL;
1117        handle_t *handle = NULL;
1118        struct ocfs2_dinode *alloc;
1119        struct ocfs2_dinode *alloc_copy = NULL;
1120        struct ocfs2_alloc_context *ac = NULL;
1121
1122        mlog_entry_void();
1123
1124        ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
1125
1126        /* This will lock the main bitmap for us. */
1127        status = ocfs2_local_alloc_reserve_for_window(osb,
1128                                                      &ac,
1129                                                      &main_bm_inode,
1130                                                      &main_bm_bh);
1131        if (status < 0) {
1132                if (status != -ENOSPC)
1133                        mlog_errno(status);
1134                goto bail;
1135        }
1136
1137        handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
1138        if (IS_ERR(handle)) {
1139                status = PTR_ERR(handle);
1140                handle = NULL;
1141                mlog_errno(status);
1142                goto bail;
1143        }
1144
1145        alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1146
1147        /* We want to clear the local alloc before doing anything
1148         * else, so that if we error later during this operation,
1149         * local alloc shutdown won't try to double free main bitmap
1150         * bits. Make a copy so the sync function knows which bits to
1151         * free. */
1152        alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
1153        if (!alloc_copy) {
1154                status = -ENOMEM;
1155                mlog_errno(status);
1156                goto bail;
1157        }
1158        memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
1159
1160        status = ocfs2_journal_access_di(handle,
1161                                         INODE_CACHE(local_alloc_inode),
1162                                         osb->local_alloc_bh,
1163                                         OCFS2_JOURNAL_ACCESS_WRITE);
1164        if (status < 0) {
1165                mlog_errno(status);
1166                goto bail;
1167        }
1168
1169        ocfs2_clear_local_alloc(alloc);
1170
1171        status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1172        if (status < 0) {
1173                mlog_errno(status);
1174                goto bail;
1175        }
1176
1177        status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1178                                          main_bm_inode, main_bm_bh);
1179        if (status < 0) {
1180                mlog_errno(status);
1181                goto bail;
1182        }
1183
1184        status = ocfs2_local_alloc_new_window(osb, handle, ac);
1185        if (status < 0) {
1186                if (status != -ENOSPC)
1187                        mlog_errno(status);
1188                goto bail;
1189        }
1190
1191        atomic_inc(&osb->alloc_stats.moves);
1192
1193        status = 0;
1194bail:
1195        if (handle)
1196                ocfs2_commit_trans(osb, handle);
1197
1198        brelse(main_bm_bh);
1199
1200        if (main_bm_inode)
1201                iput(main_bm_inode);
1202
1203        if (alloc_copy)
1204                kfree(alloc_copy);
1205
1206        if (ac)
1207                ocfs2_free_alloc_context(ac);
1208
1209        mlog_exit(status);
1210        return status;
1211}
1212
1213