linux/fs/ocfs2/slot_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * slot_map.c
   5 *
   6 *
   7 *
   8 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation; either
  13 * version 2 of the License, or (at your option) any later version.
  14 *
  15 * This program is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU General Public
  21 * License along with this program; if not, write to the
  22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 * Boston, MA 021110-1307, USA.
  24 */
  25
  26#include <linux/types.h>
  27#include <linux/slab.h>
  28#include <linux/highmem.h>
  29
  30#include <cluster/masklog.h>
  31
  32#include "ocfs2.h"
  33
  34#include "dlmglue.h"
  35#include "extent_map.h"
  36#include "heartbeat.h"
  37#include "inode.h"
  38#include "slot_map.h"
  39#include "super.h"
  40#include "sysfile.h"
  41#include "ocfs2_trace.h"
  42
  43#include "buffer_head_io.h"
  44
  45
  46struct ocfs2_slot {
  47        int sl_valid;
  48        unsigned int sl_node_num;
  49};
  50
  51struct ocfs2_slot_info {
  52        int si_extended;
  53        int si_slots_per_block;
  54        struct inode *si_inode;
  55        unsigned int si_blocks;
  56        struct buffer_head **si_bh;
  57        unsigned int si_num_slots;
  58        struct ocfs2_slot *si_slots;
  59};
  60
  61
  62static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
  63                                    unsigned int node_num);
  64
  65static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
  66                                  int slot_num)
  67{
  68        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  69        si->si_slots[slot_num].sl_valid = 0;
  70}
  71
  72static void ocfs2_set_slot(struct ocfs2_slot_info *si,
  73                           int slot_num, unsigned int node_num)
  74{
  75        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  76
  77        si->si_slots[slot_num].sl_valid = 1;
  78        si->si_slots[slot_num].sl_node_num = node_num;
  79}
  80
  81/* This version is for the extended slot map */
  82static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
  83{
  84        int b, i, slotno;
  85        struct ocfs2_slot_map_extended *se;
  86
  87        slotno = 0;
  88        for (b = 0; b < si->si_blocks; b++) {
  89                se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
  90                for (i = 0;
  91                     (i < si->si_slots_per_block) &&
  92                     (slotno < si->si_num_slots);
  93                     i++, slotno++) {
  94                        if (se->se_slots[i].es_valid)
  95                                ocfs2_set_slot(si, slotno,
  96                                               le32_to_cpu(se->se_slots[i].es_node_num));
  97                        else
  98                                ocfs2_invalidate_slot(si, slotno);
  99                }
 100        }
 101}
 102
 103/*
 104 * Post the slot information on disk into our slot_info struct.
 105 * Must be protected by osb_lock.
 106 */
 107static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
 108{
 109        int i;
 110        struct ocfs2_slot_map *sm;
 111
 112        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 113
 114        for (i = 0; i < si->si_num_slots; i++) {
 115                if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
 116                        ocfs2_invalidate_slot(si, i);
 117                else
 118                        ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
 119        }
 120}
 121
 122static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 123{
 124        /*
 125         * The slot data will have been refreshed when ocfs2_super_lock
 126         * was taken.
 127         */
 128        if (si->si_extended)
 129                ocfs2_update_slot_info_extended(si);
 130        else
 131                ocfs2_update_slot_info_old(si);
 132}
 133
 134int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 135{
 136        int ret;
 137        struct ocfs2_slot_info *si = osb->slot_info;
 138
 139        if (si == NULL)
 140                return 0;
 141
 142        BUG_ON(si->si_blocks == 0);
 143        BUG_ON(si->si_bh == NULL);
 144
 145        trace_ocfs2_refresh_slot_info(si->si_blocks);
 146
 147        /*
 148         * We pass -1 as blocknr because we expect all of si->si_bh to
 149         * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
 150         * this is not true, the read of -1 (UINT64_MAX) will fail.
 151         */
 152        ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
 153                                si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
 154        if (ret == 0) {
 155                spin_lock(&osb->osb_lock);
 156                ocfs2_update_slot_info(si);
 157                spin_unlock(&osb->osb_lock);
 158        }
 159
 160        return ret;
 161}
 162
 163/* post the our slot info stuff into it's destination bh and write it
 164 * out. */
 165static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
 166                                            int slot_num,
 167                                            struct buffer_head **bh)
 168{
 169        int blkind = slot_num / si->si_slots_per_block;
 170        int slotno = slot_num % si->si_slots_per_block;
 171        struct ocfs2_slot_map_extended *se;
 172
 173        BUG_ON(blkind >= si->si_blocks);
 174
 175        se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
 176        se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
 177        if (si->si_slots[slot_num].sl_valid)
 178                se->se_slots[slotno].es_node_num =
 179                        cpu_to_le32(si->si_slots[slot_num].sl_node_num);
 180        *bh = si->si_bh[blkind];
 181}
 182
 183static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
 184                                       int slot_num,
 185                                       struct buffer_head **bh)
 186{
 187        int i;
 188        struct ocfs2_slot_map *sm;
 189
 190        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 191        for (i = 0; i < si->si_num_slots; i++) {
 192                if (si->si_slots[i].sl_valid)
 193                        sm->sm_slots[i] =
 194                                cpu_to_le16(si->si_slots[i].sl_node_num);
 195                else
 196                        sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
 197        }
 198        *bh = si->si_bh[0];
 199}
 200
 201static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
 202                                  struct ocfs2_slot_info *si,
 203                                  int slot_num)
 204{
 205        int status;
 206        struct buffer_head *bh;
 207
 208        spin_lock(&osb->osb_lock);
 209        if (si->si_extended)
 210                ocfs2_update_disk_slot_extended(si, slot_num, &bh);
 211        else
 212                ocfs2_update_disk_slot_old(si, slot_num, &bh);
 213        spin_unlock(&osb->osb_lock);
 214
 215        status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
 216        if (status < 0)
 217                mlog_errno(status);
 218
 219        return status;
 220}
 221
 222/*
 223 * Calculate how many bytes are needed by the slot map.  Returns
 224 * an error if the slot map file is too small.
 225 */
 226static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
 227                                        struct inode *inode,
 228                                        unsigned long long *bytes)
 229{
 230        unsigned long long bytes_needed;
 231
 232        if (ocfs2_uses_extended_slot_map(osb)) {
 233                bytes_needed = osb->max_slots *
 234                        sizeof(struct ocfs2_extended_slot);
 235        } else {
 236                bytes_needed = osb->max_slots * sizeof(__le16);
 237        }
 238        if (bytes_needed > i_size_read(inode)) {
 239                mlog(ML_ERROR,
 240                     "Slot map file is too small!  (size %llu, needed %llu)\n",
 241                     i_size_read(inode), bytes_needed);
 242                return -ENOSPC;
 243        }
 244
 245        *bytes = bytes_needed;
 246        return 0;
 247}
 248
 249/* try to find global node in the slot info. Returns -ENOENT
 250 * if nothing is found. */
 251static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 252                                    unsigned int node_num)
 253{
 254        int i, ret = -ENOENT;
 255
 256        for(i = 0; i < si->si_num_slots; i++) {
 257                if (si->si_slots[i].sl_valid &&
 258                    (node_num == si->si_slots[i].sl_node_num)) {
 259                        ret = i;
 260                        break;
 261                }
 262        }
 263
 264        return ret;
 265}
 266
 267static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
 268                                   int preferred)
 269{
 270        int i, ret = -ENOSPC;
 271
 272        if ((preferred >= 0) && (preferred < si->si_num_slots)) {
 273                if (!si->si_slots[preferred].sl_valid) {
 274                        ret = preferred;
 275                        goto out;
 276                }
 277        }
 278
 279        for(i = 0; i < si->si_num_slots; i++) {
 280                if (!si->si_slots[i].sl_valid) {
 281                        ret = i;
 282                        break;
 283                }
 284        }
 285out:
 286        return ret;
 287}
 288
 289int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
 290{
 291        int slot;
 292        struct ocfs2_slot_info *si = osb->slot_info;
 293
 294        spin_lock(&osb->osb_lock);
 295        slot = __ocfs2_node_num_to_slot(si, node_num);
 296        spin_unlock(&osb->osb_lock);
 297
 298        return slot;
 299}
 300
 301int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 302                                  unsigned int *node_num)
 303{
 304        struct ocfs2_slot_info *si = osb->slot_info;
 305
 306        assert_spin_locked(&osb->osb_lock);
 307
 308        BUG_ON(slot_num < 0);
 309        BUG_ON(slot_num >= osb->max_slots);
 310
 311        if (!si->si_slots[slot_num].sl_valid)
 312                return -ENOENT;
 313
 314        *node_num = si->si_slots[slot_num].sl_node_num;
 315        return 0;
 316}
 317
 318static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
 319{
 320        unsigned int i;
 321
 322        if (si == NULL)
 323                return;
 324
 325        iput(si->si_inode);
 326        if (si->si_bh) {
 327                for (i = 0; i < si->si_blocks; i++) {
 328                        if (si->si_bh[i]) {
 329                                brelse(si->si_bh[i]);
 330                                si->si_bh[i] = NULL;
 331                        }
 332                }
 333                kfree(si->si_bh);
 334        }
 335
 336        kfree(si);
 337}
 338
 339int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 340{
 341        struct ocfs2_slot_info *si = osb->slot_info;
 342
 343        if (si == NULL)
 344                return 0;
 345
 346        spin_lock(&osb->osb_lock);
 347        ocfs2_invalidate_slot(si, slot_num);
 348        spin_unlock(&osb->osb_lock);
 349
 350        return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
 351}
 352
 353static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 354                                  struct ocfs2_slot_info *si)
 355{
 356        int status = 0;
 357        u64 blkno;
 358        unsigned long long blocks, bytes = 0;
 359        unsigned int i;
 360        struct buffer_head *bh;
 361
 362        status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
 363        if (status)
 364                goto bail;
 365
 366        blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
 367        BUG_ON(blocks > UINT_MAX);
 368        si->si_blocks = blocks;
 369        if (!si->si_blocks)
 370                goto bail;
 371
 372        if (si->si_extended)
 373                si->si_slots_per_block =
 374                        (osb->sb->s_blocksize /
 375                         sizeof(struct ocfs2_extended_slot));
 376        else
 377                si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
 378
 379        /* The size checks above should ensure this */
 380        BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
 381
 382        trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
 383
 384        si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
 385                            GFP_KERNEL);
 386        if (!si->si_bh) {
 387                status = -ENOMEM;
 388                mlog_errno(status);
 389                goto bail;
 390        }
 391
 392        for (i = 0; i < si->si_blocks; i++) {
 393                status = ocfs2_extent_map_get_blocks(si->si_inode, i,
 394                                                     &blkno, NULL, NULL);
 395                if (status < 0) {
 396                        mlog_errno(status);
 397                        goto bail;
 398                }
 399
 400                trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
 401
 402                bh = NULL;  /* Acquire a fresh bh */
 403                status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
 404                                           1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
 405                if (status < 0) {
 406                        mlog_errno(status);
 407                        goto bail;
 408                }
 409
 410                si->si_bh[i] = bh;
 411        }
 412
 413bail:
 414        return status;
 415}
 416
 417int ocfs2_init_slot_info(struct ocfs2_super *osb)
 418{
 419        int status;
 420        struct inode *inode = NULL;
 421        struct ocfs2_slot_info *si;
 422
 423        si = kzalloc(sizeof(struct ocfs2_slot_info) +
 424                     (sizeof(struct ocfs2_slot) * osb->max_slots),
 425                     GFP_KERNEL);
 426        if (!si) {
 427                status = -ENOMEM;
 428                mlog_errno(status);
 429                return status;
 430        }
 431
 432        si->si_extended = ocfs2_uses_extended_slot_map(osb);
 433        si->si_num_slots = osb->max_slots;
 434        si->si_slots = (struct ocfs2_slot *)((char *)si +
 435                                             sizeof(struct ocfs2_slot_info));
 436
 437        inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
 438                                            OCFS2_INVALID_SLOT);
 439        if (!inode) {
 440                status = -EINVAL;
 441                mlog_errno(status);
 442                goto bail;
 443        }
 444
 445        si->si_inode = inode;
 446        status = ocfs2_map_slot_buffers(osb, si);
 447        if (status < 0) {
 448                mlog_errno(status);
 449                goto bail;
 450        }
 451
 452        osb->slot_info = (struct ocfs2_slot_info *)si;
 453bail:
 454        if (status < 0)
 455                __ocfs2_free_slot_info(si);
 456
 457        return status;
 458}
 459
 460void ocfs2_free_slot_info(struct ocfs2_super *osb)
 461{
 462        struct ocfs2_slot_info *si = osb->slot_info;
 463
 464        osb->slot_info = NULL;
 465        __ocfs2_free_slot_info(si);
 466}
 467
 468int ocfs2_find_slot(struct ocfs2_super *osb)
 469{
 470        int status;
 471        int slot;
 472        struct ocfs2_slot_info *si;
 473
 474        si = osb->slot_info;
 475
 476        spin_lock(&osb->osb_lock);
 477        ocfs2_update_slot_info(si);
 478
 479        /* search for ourselves first and take the slot if it already
 480         * exists. Perhaps we need to mark this in a variable for our
 481         * own journal recovery? Possibly not, though we certainly
 482         * need to warn to the user */
 483        slot = __ocfs2_node_num_to_slot(si, osb->node_num);
 484        if (slot < 0) {
 485                /* if no slot yet, then just take 1st available
 486                 * one. */
 487                slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
 488                if (slot < 0) {
 489                        spin_unlock(&osb->osb_lock);
 490                        mlog(ML_ERROR, "no free slots available!\n");
 491                        status = -EINVAL;
 492                        goto bail;
 493                }
 494        } else
 495                printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
 496                       "allocated to this node!\n", slot, osb->dev_str);
 497
 498        ocfs2_set_slot(si, slot, osb->node_num);
 499        osb->slot_num = slot;
 500        spin_unlock(&osb->osb_lock);
 501
 502        trace_ocfs2_find_slot(osb->slot_num);
 503
 504        status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
 505        if (status < 0) {
 506                mlog_errno(status);
 507                /*
 508                 * if write block failed, invalidate slot to avoid overwrite
 509                 * slot during dismount in case another node rightly has mounted
 510                 */
 511                spin_lock(&osb->osb_lock);
 512                ocfs2_invalidate_slot(si, osb->slot_num);
 513                osb->slot_num = OCFS2_INVALID_SLOT;
 514                spin_unlock(&osb->osb_lock);
 515        }
 516
 517bail:
 518        return status;
 519}
 520
 521void ocfs2_put_slot(struct ocfs2_super *osb)
 522{
 523        int status, slot_num;
 524        struct ocfs2_slot_info *si = osb->slot_info;
 525
 526        if (!si)
 527                return;
 528
 529        spin_lock(&osb->osb_lock);
 530        ocfs2_update_slot_info(si);
 531
 532        slot_num = osb->slot_num;
 533        ocfs2_invalidate_slot(si, osb->slot_num);
 534        osb->slot_num = OCFS2_INVALID_SLOT;
 535        spin_unlock(&osb->osb_lock);
 536
 537        status = ocfs2_update_disk_slot(osb, si, slot_num);
 538        if (status < 0)
 539                mlog_errno(status);
 540
 541        ocfs2_free_slot_info(osb);
 542}
 543