linux/fs/ocfs2/slot_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * slot_map.c
   5 *
   6 *
   7 *
   8 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation; either
  13 * version 2 of the License, or (at your option) any later version.
  14 *
  15 * This program is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU General Public
  21 * License along with this program; if not, write to the
  22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 * Boston, MA 021110-1307, USA.
  24 */
  25
  26#include <linux/types.h>
  27#include <linux/slab.h>
  28#include <linux/highmem.h>
  29
  30#include <cluster/masklog.h>
  31
  32#include "ocfs2.h"
  33
  34#include "dlmglue.h"
  35#include "extent_map.h"
  36#include "heartbeat.h"
  37#include "inode.h"
  38#include "slot_map.h"
  39#include "super.h"
  40#include "sysfile.h"
  41#include "ocfs2_trace.h"
  42
  43#include "buffer_head_io.h"
  44
  45
  46struct ocfs2_slot {
  47        int sl_valid;
  48        unsigned int sl_node_num;
  49};
  50
  51struct ocfs2_slot_info {
  52        int si_extended;
  53        int si_slots_per_block;
  54        struct inode *si_inode;
  55        unsigned int si_blocks;
  56        struct buffer_head **si_bh;
  57        unsigned int si_num_slots;
  58        struct ocfs2_slot *si_slots;
  59};
  60
  61
  62static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
  63                                    unsigned int node_num);
  64
  65static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
  66                                  int slot_num)
  67{
  68        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  69        si->si_slots[slot_num].sl_valid = 0;
  70}
  71
  72static void ocfs2_set_slot(struct ocfs2_slot_info *si,
  73                           int slot_num, unsigned int node_num)
  74{
  75        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  76
  77        si->si_slots[slot_num].sl_valid = 1;
  78        si->si_slots[slot_num].sl_node_num = node_num;
  79}
  80
  81/* This version is for the extended slot map */
  82static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
  83{
  84        int b, i, slotno;
  85        struct ocfs2_slot_map_extended *se;
  86
  87        slotno = 0;
  88        for (b = 0; b < si->si_blocks; b++) {
  89                se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
  90                for (i = 0;
  91                     (i < si->si_slots_per_block) &&
  92                     (slotno < si->si_num_slots);
  93                     i++, slotno++) {
  94                        if (se->se_slots[i].es_valid)
  95                                ocfs2_set_slot(si, slotno,
  96                                               le32_to_cpu(se->se_slots[i].es_node_num));
  97                        else
  98                                ocfs2_invalidate_slot(si, slotno);
  99                }
 100        }
 101}
 102
 103/*
 104 * Post the slot information on disk into our slot_info struct.
 105 * Must be protected by osb_lock.
 106 */
 107static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
 108{
 109        int i;
 110        struct ocfs2_slot_map *sm;
 111
 112        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 113
 114        for (i = 0; i < si->si_num_slots; i++) {
 115                if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
 116                        ocfs2_invalidate_slot(si, i);
 117                else
 118                        ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
 119        }
 120}
 121
 122static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 123{
 124        /*
 125         * The slot data will have been refreshed when ocfs2_super_lock
 126         * was taken.
 127         */
 128        if (si->si_extended)
 129                ocfs2_update_slot_info_extended(si);
 130        else
 131                ocfs2_update_slot_info_old(si);
 132}
 133
 134int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 135{
 136        int ret;
 137        struct ocfs2_slot_info *si = osb->slot_info;
 138
 139        if (si == NULL)
 140                return 0;
 141
 142        BUG_ON(si->si_blocks == 0);
 143        BUG_ON(si->si_bh == NULL);
 144
 145        trace_ocfs2_refresh_slot_info(si->si_blocks);
 146
 147        /*
 148         * We pass -1 as blocknr because we expect all of si->si_bh to
 149         * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
 150         * this is not true, the read of -1 (UINT64_MAX) will fail.
 151         */
 152        ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
 153                                si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
 154        if (ret == 0) {
 155                spin_lock(&osb->osb_lock);
 156                ocfs2_update_slot_info(si);
 157                spin_unlock(&osb->osb_lock);
 158        }
 159
 160        return ret;
 161}
 162
 163/* post the our slot info stuff into it's destination bh and write it
 164 * out. */
 165static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
 166                                            int slot_num,
 167                                            struct buffer_head **bh)
 168{
 169        int blkind = slot_num / si->si_slots_per_block;
 170        int slotno = slot_num % si->si_slots_per_block;
 171        struct ocfs2_slot_map_extended *se;
 172
 173        BUG_ON(blkind >= si->si_blocks);
 174
 175        se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
 176        se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
 177        if (si->si_slots[slot_num].sl_valid)
 178                se->se_slots[slotno].es_node_num =
 179                        cpu_to_le32(si->si_slots[slot_num].sl_node_num);
 180        *bh = si->si_bh[blkind];
 181}
 182
 183static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
 184                                       int slot_num,
 185                                       struct buffer_head **bh)
 186{
 187        int i;
 188        struct ocfs2_slot_map *sm;
 189
 190        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 191        for (i = 0; i < si->si_num_slots; i++) {
 192                if (si->si_slots[i].sl_valid)
 193                        sm->sm_slots[i] =
 194                                cpu_to_le16(si->si_slots[i].sl_node_num);
 195                else
 196                        sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
 197        }
 198        *bh = si->si_bh[0];
 199}
 200
 201static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
 202                                  struct ocfs2_slot_info *si,
 203                                  int slot_num)
 204{
 205        int status;
 206        struct buffer_head *bh;
 207
 208        spin_lock(&osb->osb_lock);
 209        if (si->si_extended)
 210                ocfs2_update_disk_slot_extended(si, slot_num, &bh);
 211        else
 212                ocfs2_update_disk_slot_old(si, slot_num, &bh);
 213        spin_unlock(&osb->osb_lock);
 214
 215        status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
 216        if (status < 0)
 217                mlog_errno(status);
 218
 219        return status;
 220}
 221
 222/*
 223 * Calculate how many bytes are needed by the slot map.  Returns
 224 * an error if the slot map file is too small.
 225 */
 226static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
 227                                        struct inode *inode,
 228                                        unsigned long long *bytes)
 229{
 230        unsigned long long bytes_needed;
 231
 232        if (ocfs2_uses_extended_slot_map(osb)) {
 233                bytes_needed = osb->max_slots *
 234                        sizeof(struct ocfs2_extended_slot);
 235        } else {
 236                bytes_needed = osb->max_slots * sizeof(__le16);
 237        }
 238        if (bytes_needed > i_size_read(inode)) {
 239                mlog(ML_ERROR,
 240                     "Slot map file is too small!  (size %llu, needed %llu)\n",
 241                     i_size_read(inode), bytes_needed);
 242                return -ENOSPC;
 243        }
 244
 245        *bytes = bytes_needed;
 246        return 0;
 247}
 248
 249/* try to find global node in the slot info. Returns -ENOENT
 250 * if nothing is found. */
 251static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 252                                    unsigned int node_num)
 253{
 254        int i, ret = -ENOENT;
 255
 256        for(i = 0; i < si->si_num_slots; i++) {
 257                if (si->si_slots[i].sl_valid &&
 258                    (node_num == si->si_slots[i].sl_node_num)) {
 259                        ret = i;
 260                        break;
 261                }
 262        }
 263
 264        return ret;
 265}
 266
 267static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
 268                                   int preferred)
 269{
 270        int i, ret = -ENOSPC;
 271
 272        if ((preferred >= 0) && (preferred < si->si_num_slots)) {
 273                if (!si->si_slots[preferred].sl_valid) {
 274                        ret = preferred;
 275                        goto out;
 276                }
 277        }
 278
 279        for(i = 0; i < si->si_num_slots; i++) {
 280                if (!si->si_slots[i].sl_valid) {
 281                        ret = i;
 282                        break;
 283                }
 284        }
 285out:
 286        return ret;
 287}
 288
 289int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
 290{
 291        int slot;
 292        struct ocfs2_slot_info *si = osb->slot_info;
 293
 294        spin_lock(&osb->osb_lock);
 295        slot = __ocfs2_node_num_to_slot(si, node_num);
 296        spin_unlock(&osb->osb_lock);
 297
 298        return slot;
 299}
 300
 301int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 302                                  unsigned int *node_num)
 303{
 304        struct ocfs2_slot_info *si = osb->slot_info;
 305
 306        assert_spin_locked(&osb->osb_lock);
 307
 308        BUG_ON(slot_num < 0);
 309        BUG_ON(slot_num > osb->max_slots);
 310
 311        if (!si->si_slots[slot_num].sl_valid)
 312                return -ENOENT;
 313
 314        *node_num = si->si_slots[slot_num].sl_node_num;
 315        return 0;
 316}
 317
 318static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
 319{
 320        unsigned int i;
 321
 322        if (si == NULL)
 323                return;
 324
 325        if (si->si_inode)
 326                iput(si->si_inode);
 327        if (si->si_bh) {
 328                for (i = 0; i < si->si_blocks; i++) {
 329                        if (si->si_bh[i]) {
 330                                brelse(si->si_bh[i]);
 331                                si->si_bh[i] = NULL;
 332                        }
 333                }
 334                kfree(si->si_bh);
 335        }
 336
 337        kfree(si);
 338}
 339
 340int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 341{
 342        struct ocfs2_slot_info *si = osb->slot_info;
 343
 344        if (si == NULL)
 345                return 0;
 346
 347        spin_lock(&osb->osb_lock);
 348        ocfs2_invalidate_slot(si, slot_num);
 349        spin_unlock(&osb->osb_lock);
 350
 351        return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
 352}
 353
 354static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 355                                  struct ocfs2_slot_info *si)
 356{
 357        int status = 0;
 358        u64 blkno;
 359        unsigned long long blocks, bytes = 0;
 360        unsigned int i;
 361        struct buffer_head *bh;
 362
 363        status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
 364        if (status)
 365                goto bail;
 366
 367        blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
 368        BUG_ON(blocks > UINT_MAX);
 369        si->si_blocks = blocks;
 370        if (!si->si_blocks)
 371                goto bail;
 372
 373        if (si->si_extended)
 374                si->si_slots_per_block =
 375                        (osb->sb->s_blocksize /
 376                         sizeof(struct ocfs2_extended_slot));
 377        else
 378                si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
 379
 380        /* The size checks above should ensure this */
 381        BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
 382
 383        trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
 384
 385        si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
 386                            GFP_KERNEL);
 387        if (!si->si_bh) {
 388                status = -ENOMEM;
 389                mlog_errno(status);
 390                goto bail;
 391        }
 392
 393        for (i = 0; i < si->si_blocks; i++) {
 394                status = ocfs2_extent_map_get_blocks(si->si_inode, i,
 395                                                     &blkno, NULL, NULL);
 396                if (status < 0) {
 397                        mlog_errno(status);
 398                        goto bail;
 399                }
 400
 401                trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
 402
 403                bh = NULL;  /* Acquire a fresh bh */
 404                status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
 405                                           1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
 406                if (status < 0) {
 407                        mlog_errno(status);
 408                        goto bail;
 409                }
 410
 411                si->si_bh[i] = bh;
 412        }
 413
 414bail:
 415        return status;
 416}
 417
 418int ocfs2_init_slot_info(struct ocfs2_super *osb)
 419{
 420        int status;
 421        struct inode *inode = NULL;
 422        struct ocfs2_slot_info *si;
 423
 424        si = kzalloc(sizeof(struct ocfs2_slot_info) +
 425                     (sizeof(struct ocfs2_slot) * osb->max_slots),
 426                     GFP_KERNEL);
 427        if (!si) {
 428                status = -ENOMEM;
 429                mlog_errno(status);
 430                goto bail;
 431        }
 432
 433        si->si_extended = ocfs2_uses_extended_slot_map(osb);
 434        si->si_num_slots = osb->max_slots;
 435        si->si_slots = (struct ocfs2_slot *)((char *)si +
 436                                             sizeof(struct ocfs2_slot_info));
 437
 438        inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
 439                                            OCFS2_INVALID_SLOT);
 440        if (!inode) {
 441                status = -EINVAL;
 442                mlog_errno(status);
 443                goto bail;
 444        }
 445
 446        si->si_inode = inode;
 447        status = ocfs2_map_slot_buffers(osb, si);
 448        if (status < 0) {
 449                mlog_errno(status);
 450                goto bail;
 451        }
 452
 453        osb->slot_info = (struct ocfs2_slot_info *)si;
 454bail:
 455        if (status < 0 && si)
 456                __ocfs2_free_slot_info(si);
 457
 458        return status;
 459}
 460
 461void ocfs2_free_slot_info(struct ocfs2_super *osb)
 462{
 463        struct ocfs2_slot_info *si = osb->slot_info;
 464
 465        osb->slot_info = NULL;
 466        __ocfs2_free_slot_info(si);
 467}
 468
 469int ocfs2_find_slot(struct ocfs2_super *osb)
 470{
 471        int status;
 472        int slot;
 473        struct ocfs2_slot_info *si;
 474
 475        si = osb->slot_info;
 476
 477        spin_lock(&osb->osb_lock);
 478        ocfs2_update_slot_info(si);
 479
 480        /* search for ourselves first and take the slot if it already
 481         * exists. Perhaps we need to mark this in a variable for our
 482         * own journal recovery? Possibly not, though we certainly
 483         * need to warn to the user */
 484        slot = __ocfs2_node_num_to_slot(si, osb->node_num);
 485        if (slot < 0) {
 486                /* if no slot yet, then just take 1st available
 487                 * one. */
 488                slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
 489                if (slot < 0) {
 490                        spin_unlock(&osb->osb_lock);
 491                        mlog(ML_ERROR, "no free slots available!\n");
 492                        status = -EINVAL;
 493                        goto bail;
 494                }
 495        } else
 496                printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
 497                       "allocated to this node!\n", slot, osb->dev_str);
 498
 499        ocfs2_set_slot(si, slot, osb->node_num);
 500        osb->slot_num = slot;
 501        spin_unlock(&osb->osb_lock);
 502
 503        trace_ocfs2_find_slot(osb->slot_num);
 504
 505        status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
 506        if (status < 0)
 507                mlog_errno(status);
 508
 509bail:
 510        return status;
 511}
 512
 513void ocfs2_put_slot(struct ocfs2_super *osb)
 514{
 515        int status, slot_num;
 516        struct ocfs2_slot_info *si = osb->slot_info;
 517
 518        if (!si)
 519                return;
 520
 521        spin_lock(&osb->osb_lock);
 522        ocfs2_update_slot_info(si);
 523
 524        slot_num = osb->slot_num;
 525        ocfs2_invalidate_slot(si, osb->slot_num);
 526        osb->slot_num = OCFS2_INVALID_SLOT;
 527        spin_unlock(&osb->osb_lock);
 528
 529        status = ocfs2_update_disk_slot(osb, si, slot_num);
 530        if (status < 0) {
 531                mlog_errno(status);
 532                goto bail;
 533        }
 534
 535bail:
 536        ocfs2_free_slot_info(osb);
 537}
 538
 539