linux/fs/ocfs2/slot_map.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* -*- mode: c; c-basic-offset: 8; -*-
   3 * vim: noexpandtab sw=8 ts=8 sts=0:
   4 *
   5 * slot_map.c
   6 *
   7 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   8 */
   9
  10#include <linux/types.h>
  11#include <linux/slab.h>
  12#include <linux/highmem.h>
  13
  14#include <cluster/masklog.h>
  15
  16#include "ocfs2.h"
  17
  18#include "dlmglue.h"
  19#include "extent_map.h"
  20#include "heartbeat.h"
  21#include "inode.h"
  22#include "slot_map.h"
  23#include "super.h"
  24#include "sysfile.h"
  25#include "ocfs2_trace.h"
  26
  27#include "buffer_head_io.h"
  28
  29
  30struct ocfs2_slot {
  31        int sl_valid;
  32        unsigned int sl_node_num;
  33};
  34
  35struct ocfs2_slot_info {
  36        int si_extended;
  37        int si_slots_per_block;
  38        struct inode *si_inode;
  39        unsigned int si_blocks;
  40        struct buffer_head **si_bh;
  41        unsigned int si_num_slots;
  42        struct ocfs2_slot si_slots[];
  43};
  44
  45
  46static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
  47                                    unsigned int node_num);
  48
  49static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
  50                                  int slot_num)
  51{
  52        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  53        si->si_slots[slot_num].sl_valid = 0;
  54}
  55
  56static void ocfs2_set_slot(struct ocfs2_slot_info *si,
  57                           int slot_num, unsigned int node_num)
  58{
  59        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  60
  61        si->si_slots[slot_num].sl_valid = 1;
  62        si->si_slots[slot_num].sl_node_num = node_num;
  63}
  64
  65/* This version is for the extended slot map */
  66static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
  67{
  68        int b, i, slotno;
  69        struct ocfs2_slot_map_extended *se;
  70
  71        slotno = 0;
  72        for (b = 0; b < si->si_blocks; b++) {
  73                se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
  74                for (i = 0;
  75                     (i < si->si_slots_per_block) &&
  76                     (slotno < si->si_num_slots);
  77                     i++, slotno++) {
  78                        if (se->se_slots[i].es_valid)
  79                                ocfs2_set_slot(si, slotno,
  80                                               le32_to_cpu(se->se_slots[i].es_node_num));
  81                        else
  82                                ocfs2_invalidate_slot(si, slotno);
  83                }
  84        }
  85}
  86
  87/*
  88 * Post the slot information on disk into our slot_info struct.
  89 * Must be protected by osb_lock.
  90 */
  91static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
  92{
  93        int i;
  94        struct ocfs2_slot_map *sm;
  95
  96        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
  97
  98        for (i = 0; i < si->si_num_slots; i++) {
  99                if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
 100                        ocfs2_invalidate_slot(si, i);
 101                else
 102                        ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
 103        }
 104}
 105
 106static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 107{
 108        /*
 109         * The slot data will have been refreshed when ocfs2_super_lock
 110         * was taken.
 111         */
 112        if (si->si_extended)
 113                ocfs2_update_slot_info_extended(si);
 114        else
 115                ocfs2_update_slot_info_old(si);
 116}
 117
 118int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 119{
 120        int ret;
 121        struct ocfs2_slot_info *si = osb->slot_info;
 122
 123        if (si == NULL)
 124                return 0;
 125
 126        BUG_ON(si->si_blocks == 0);
 127        BUG_ON(si->si_bh == NULL);
 128
 129        trace_ocfs2_refresh_slot_info(si->si_blocks);
 130
 131        /*
 132         * We pass -1 as blocknr because we expect all of si->si_bh to
 133         * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
 134         * this is not true, the read of -1 (UINT64_MAX) will fail.
 135         */
 136        ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
 137                                si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
 138        if (ret == 0) {
 139                spin_lock(&osb->osb_lock);
 140                ocfs2_update_slot_info(si);
 141                spin_unlock(&osb->osb_lock);
 142        }
 143
 144        return ret;
 145}
 146
 147/* post the our slot info stuff into it's destination bh and write it
 148 * out. */
 149static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
 150                                            int slot_num,
 151                                            struct buffer_head **bh)
 152{
 153        int blkind = slot_num / si->si_slots_per_block;
 154        int slotno = slot_num % si->si_slots_per_block;
 155        struct ocfs2_slot_map_extended *se;
 156
 157        BUG_ON(blkind >= si->si_blocks);
 158
 159        se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
 160        se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
 161        if (si->si_slots[slot_num].sl_valid)
 162                se->se_slots[slotno].es_node_num =
 163                        cpu_to_le32(si->si_slots[slot_num].sl_node_num);
 164        *bh = si->si_bh[blkind];
 165}
 166
 167static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
 168                                       int slot_num,
 169                                       struct buffer_head **bh)
 170{
 171        int i;
 172        struct ocfs2_slot_map *sm;
 173
 174        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 175        for (i = 0; i < si->si_num_slots; i++) {
 176                if (si->si_slots[i].sl_valid)
 177                        sm->sm_slots[i] =
 178                                cpu_to_le16(si->si_slots[i].sl_node_num);
 179                else
 180                        sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
 181        }
 182        *bh = si->si_bh[0];
 183}
 184
 185static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
 186                                  struct ocfs2_slot_info *si,
 187                                  int slot_num)
 188{
 189        int status;
 190        struct buffer_head *bh;
 191
 192        spin_lock(&osb->osb_lock);
 193        if (si->si_extended)
 194                ocfs2_update_disk_slot_extended(si, slot_num, &bh);
 195        else
 196                ocfs2_update_disk_slot_old(si, slot_num, &bh);
 197        spin_unlock(&osb->osb_lock);
 198
 199        status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
 200        if (status < 0)
 201                mlog_errno(status);
 202
 203        return status;
 204}
 205
 206/*
 207 * Calculate how many bytes are needed by the slot map.  Returns
 208 * an error if the slot map file is too small.
 209 */
 210static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
 211                                        struct inode *inode,
 212                                        unsigned long long *bytes)
 213{
 214        unsigned long long bytes_needed;
 215
 216        if (ocfs2_uses_extended_slot_map(osb)) {
 217                bytes_needed = osb->max_slots *
 218                        sizeof(struct ocfs2_extended_slot);
 219        } else {
 220                bytes_needed = osb->max_slots * sizeof(__le16);
 221        }
 222        if (bytes_needed > i_size_read(inode)) {
 223                mlog(ML_ERROR,
 224                     "Slot map file is too small!  (size %llu, needed %llu)\n",
 225                     i_size_read(inode), bytes_needed);
 226                return -ENOSPC;
 227        }
 228
 229        *bytes = bytes_needed;
 230        return 0;
 231}
 232
 233/* try to find global node in the slot info. Returns -ENOENT
 234 * if nothing is found. */
 235static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 236                                    unsigned int node_num)
 237{
 238        int i, ret = -ENOENT;
 239
 240        for(i = 0; i < si->si_num_slots; i++) {
 241                if (si->si_slots[i].sl_valid &&
 242                    (node_num == si->si_slots[i].sl_node_num)) {
 243                        ret = i;
 244                        break;
 245                }
 246        }
 247
 248        return ret;
 249}
 250
 251static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
 252                                   int preferred)
 253{
 254        int i, ret = -ENOSPC;
 255
 256        if ((preferred >= 0) && (preferred < si->si_num_slots)) {
 257                if (!si->si_slots[preferred].sl_valid) {
 258                        ret = preferred;
 259                        goto out;
 260                }
 261        }
 262
 263        for(i = 0; i < si->si_num_slots; i++) {
 264                if (!si->si_slots[i].sl_valid) {
 265                        ret = i;
 266                        break;
 267                }
 268        }
 269out:
 270        return ret;
 271}
 272
 273int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
 274{
 275        int slot;
 276        struct ocfs2_slot_info *si = osb->slot_info;
 277
 278        spin_lock(&osb->osb_lock);
 279        slot = __ocfs2_node_num_to_slot(si, node_num);
 280        spin_unlock(&osb->osb_lock);
 281
 282        return slot;
 283}
 284
 285int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 286                                  unsigned int *node_num)
 287{
 288        struct ocfs2_slot_info *si = osb->slot_info;
 289
 290        assert_spin_locked(&osb->osb_lock);
 291
 292        BUG_ON(slot_num < 0);
 293        BUG_ON(slot_num >= osb->max_slots);
 294
 295        if (!si->si_slots[slot_num].sl_valid)
 296                return -ENOENT;
 297
 298        *node_num = si->si_slots[slot_num].sl_node_num;
 299        return 0;
 300}
 301
 302static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
 303{
 304        unsigned int i;
 305
 306        if (si == NULL)
 307                return;
 308
 309        iput(si->si_inode);
 310        if (si->si_bh) {
 311                for (i = 0; i < si->si_blocks; i++) {
 312                        if (si->si_bh[i]) {
 313                                brelse(si->si_bh[i]);
 314                                si->si_bh[i] = NULL;
 315                        }
 316                }
 317                kfree(si->si_bh);
 318        }
 319
 320        kfree(si);
 321}
 322
 323int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 324{
 325        struct ocfs2_slot_info *si = osb->slot_info;
 326
 327        if (si == NULL)
 328                return 0;
 329
 330        spin_lock(&osb->osb_lock);
 331        ocfs2_invalidate_slot(si, slot_num);
 332        spin_unlock(&osb->osb_lock);
 333
 334        return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
 335}
 336
 337static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 338                                  struct ocfs2_slot_info *si)
 339{
 340        int status = 0;
 341        u64 blkno;
 342        unsigned long long blocks, bytes = 0;
 343        unsigned int i;
 344        struct buffer_head *bh;
 345
 346        status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
 347        if (status)
 348                goto bail;
 349
 350        blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
 351        BUG_ON(blocks > UINT_MAX);
 352        si->si_blocks = blocks;
 353        if (!si->si_blocks)
 354                goto bail;
 355
 356        if (si->si_extended)
 357                si->si_slots_per_block =
 358                        (osb->sb->s_blocksize /
 359                         sizeof(struct ocfs2_extended_slot));
 360        else
 361                si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
 362
 363        /* The size checks above should ensure this */
 364        BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
 365
 366        trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
 367
 368        si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
 369                            GFP_KERNEL);
 370        if (!si->si_bh) {
 371                status = -ENOMEM;
 372                mlog_errno(status);
 373                goto bail;
 374        }
 375
 376        for (i = 0; i < si->si_blocks; i++) {
 377                status = ocfs2_extent_map_get_blocks(si->si_inode, i,
 378                                                     &blkno, NULL, NULL);
 379                if (status < 0) {
 380                        mlog_errno(status);
 381                        goto bail;
 382                }
 383
 384                trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
 385
 386                bh = NULL;  /* Acquire a fresh bh */
 387                status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
 388                                           1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
 389                if (status < 0) {
 390                        mlog_errno(status);
 391                        goto bail;
 392                }
 393
 394                si->si_bh[i] = bh;
 395        }
 396
 397bail:
 398        return status;
 399}
 400
 401int ocfs2_init_slot_info(struct ocfs2_super *osb)
 402{
 403        int status;
 404        struct inode *inode = NULL;
 405        struct ocfs2_slot_info *si;
 406
 407        si = kzalloc(struct_size(si, si_slots, osb->max_slots), GFP_KERNEL);
 408        if (!si) {
 409                status = -ENOMEM;
 410                mlog_errno(status);
 411                return status;
 412        }
 413
 414        si->si_extended = ocfs2_uses_extended_slot_map(osb);
 415        si->si_num_slots = osb->max_slots;
 416
 417        inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
 418                                            OCFS2_INVALID_SLOT);
 419        if (!inode) {
 420                status = -EINVAL;
 421                mlog_errno(status);
 422                goto bail;
 423        }
 424
 425        si->si_inode = inode;
 426        status = ocfs2_map_slot_buffers(osb, si);
 427        if (status < 0) {
 428                mlog_errno(status);
 429                goto bail;
 430        }
 431
 432        osb->slot_info = (struct ocfs2_slot_info *)si;
 433bail:
 434        if (status < 0)
 435                __ocfs2_free_slot_info(si);
 436
 437        return status;
 438}
 439
 440void ocfs2_free_slot_info(struct ocfs2_super *osb)
 441{
 442        struct ocfs2_slot_info *si = osb->slot_info;
 443
 444        osb->slot_info = NULL;
 445        __ocfs2_free_slot_info(si);
 446}
 447
 448int ocfs2_find_slot(struct ocfs2_super *osb)
 449{
 450        int status;
 451        int slot;
 452        struct ocfs2_slot_info *si;
 453
 454        si = osb->slot_info;
 455
 456        spin_lock(&osb->osb_lock);
 457        ocfs2_update_slot_info(si);
 458
 459        /* search for ourselves first and take the slot if it already
 460         * exists. Perhaps we need to mark this in a variable for our
 461         * own journal recovery? Possibly not, though we certainly
 462         * need to warn to the user */
 463        slot = __ocfs2_node_num_to_slot(si, osb->node_num);
 464        if (slot < 0) {
 465                /* if no slot yet, then just take 1st available
 466                 * one. */
 467                slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
 468                if (slot < 0) {
 469                        spin_unlock(&osb->osb_lock);
 470                        mlog(ML_ERROR, "no free slots available!\n");
 471                        status = -EINVAL;
 472                        goto bail;
 473                }
 474        } else
 475                printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
 476                       "allocated to this node!\n", slot, osb->dev_str);
 477
 478        ocfs2_set_slot(si, slot, osb->node_num);
 479        osb->slot_num = slot;
 480        spin_unlock(&osb->osb_lock);
 481
 482        trace_ocfs2_find_slot(osb->slot_num);
 483
 484        status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
 485        if (status < 0) {
 486                mlog_errno(status);
 487                /*
 488                 * if write block failed, invalidate slot to avoid overwrite
 489                 * slot during dismount in case another node rightly has mounted
 490                 */
 491                spin_lock(&osb->osb_lock);
 492                ocfs2_invalidate_slot(si, osb->slot_num);
 493                osb->slot_num = OCFS2_INVALID_SLOT;
 494                spin_unlock(&osb->osb_lock);
 495        }
 496
 497bail:
 498        return status;
 499}
 500
 501void ocfs2_put_slot(struct ocfs2_super *osb)
 502{
 503        int status, slot_num;
 504        struct ocfs2_slot_info *si = osb->slot_info;
 505
 506        if (!si)
 507                return;
 508
 509        spin_lock(&osb->osb_lock);
 510        ocfs2_update_slot_info(si);
 511
 512        slot_num = osb->slot_num;
 513        ocfs2_invalidate_slot(si, osb->slot_num);
 514        osb->slot_num = OCFS2_INVALID_SLOT;
 515        spin_unlock(&osb->osb_lock);
 516
 517        status = ocfs2_update_disk_slot(osb, si, slot_num);
 518        if (status < 0)
 519                mlog_errno(status);
 520
 521        ocfs2_free_slot_info(osb);
 522}
 523