linux/fs/ocfs2/slot_map.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * slot_map.c
   4 *
   5 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   6 */
   7
   8#include <linux/types.h>
   9#include <linux/slab.h>
  10#include <linux/highmem.h>
  11
  12#include <cluster/masklog.h>
  13
  14#include "ocfs2.h"
  15
  16#include "dlmglue.h"
  17#include "extent_map.h"
  18#include "heartbeat.h"
  19#include "inode.h"
  20#include "slot_map.h"
  21#include "super.h"
  22#include "sysfile.h"
  23#include "ocfs2_trace.h"
  24
  25#include "buffer_head_io.h"
  26
  27
  28struct ocfs2_slot {
  29        int sl_valid;
  30        unsigned int sl_node_num;
  31};
  32
  33struct ocfs2_slot_info {
  34        int si_extended;
  35        int si_slots_per_block;
  36        struct inode *si_inode;
  37        unsigned int si_blocks;
  38        struct buffer_head **si_bh;
  39        unsigned int si_num_slots;
  40        struct ocfs2_slot si_slots[];
  41};
  42
  43
  44static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
  45                                    unsigned int node_num);
  46
  47static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
  48                                  int slot_num)
  49{
  50        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  51        si->si_slots[slot_num].sl_valid = 0;
  52}
  53
  54static void ocfs2_set_slot(struct ocfs2_slot_info *si,
  55                           int slot_num, unsigned int node_num)
  56{
  57        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  58
  59        si->si_slots[slot_num].sl_valid = 1;
  60        si->si_slots[slot_num].sl_node_num = node_num;
  61}
  62
  63/* This version is for the extended slot map */
  64static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
  65{
  66        int b, i, slotno;
  67        struct ocfs2_slot_map_extended *se;
  68
  69        slotno = 0;
  70        for (b = 0; b < si->si_blocks; b++) {
  71                se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
  72                for (i = 0;
  73                     (i < si->si_slots_per_block) &&
  74                     (slotno < si->si_num_slots);
  75                     i++, slotno++) {
  76                        if (se->se_slots[i].es_valid)
  77                                ocfs2_set_slot(si, slotno,
  78                                               le32_to_cpu(se->se_slots[i].es_node_num));
  79                        else
  80                                ocfs2_invalidate_slot(si, slotno);
  81                }
  82        }
  83}
  84
  85/*
  86 * Post the slot information on disk into our slot_info struct.
  87 * Must be protected by osb_lock.
  88 */
  89static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
  90{
  91        int i;
  92        struct ocfs2_slot_map *sm;
  93
  94        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
  95
  96        for (i = 0; i < si->si_num_slots; i++) {
  97                if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
  98                        ocfs2_invalidate_slot(si, i);
  99                else
 100                        ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
 101        }
 102}
 103
 104static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 105{
 106        /*
 107         * The slot data will have been refreshed when ocfs2_super_lock
 108         * was taken.
 109         */
 110        if (si->si_extended)
 111                ocfs2_update_slot_info_extended(si);
 112        else
 113                ocfs2_update_slot_info_old(si);
 114}
 115
 116int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 117{
 118        int ret;
 119        struct ocfs2_slot_info *si = osb->slot_info;
 120
 121        if (si == NULL)
 122                return 0;
 123
 124        BUG_ON(si->si_blocks == 0);
 125        BUG_ON(si->si_bh == NULL);
 126
 127        trace_ocfs2_refresh_slot_info(si->si_blocks);
 128
 129        /*
 130         * We pass -1 as blocknr because we expect all of si->si_bh to
 131         * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
 132         * this is not true, the read of -1 (UINT64_MAX) will fail.
 133         */
 134        ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
 135                                si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
 136        if (ret == 0) {
 137                spin_lock(&osb->osb_lock);
 138                ocfs2_update_slot_info(si);
 139                spin_unlock(&osb->osb_lock);
 140        }
 141
 142        return ret;
 143}
 144
 145/* post the our slot info stuff into it's destination bh and write it
 146 * out. */
 147static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
 148                                            int slot_num,
 149                                            struct buffer_head **bh)
 150{
 151        int blkind = slot_num / si->si_slots_per_block;
 152        int slotno = slot_num % si->si_slots_per_block;
 153        struct ocfs2_slot_map_extended *se;
 154
 155        BUG_ON(blkind >= si->si_blocks);
 156
 157        se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
 158        se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
 159        if (si->si_slots[slot_num].sl_valid)
 160                se->se_slots[slotno].es_node_num =
 161                        cpu_to_le32(si->si_slots[slot_num].sl_node_num);
 162        *bh = si->si_bh[blkind];
 163}
 164
 165static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
 166                                       int slot_num,
 167                                       struct buffer_head **bh)
 168{
 169        int i;
 170        struct ocfs2_slot_map *sm;
 171
 172        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 173        for (i = 0; i < si->si_num_slots; i++) {
 174                if (si->si_slots[i].sl_valid)
 175                        sm->sm_slots[i] =
 176                                cpu_to_le16(si->si_slots[i].sl_node_num);
 177                else
 178                        sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
 179        }
 180        *bh = si->si_bh[0];
 181}
 182
 183static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
 184                                  struct ocfs2_slot_info *si,
 185                                  int slot_num)
 186{
 187        int status;
 188        struct buffer_head *bh;
 189
 190        spin_lock(&osb->osb_lock);
 191        if (si->si_extended)
 192                ocfs2_update_disk_slot_extended(si, slot_num, &bh);
 193        else
 194                ocfs2_update_disk_slot_old(si, slot_num, &bh);
 195        spin_unlock(&osb->osb_lock);
 196
 197        status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
 198        if (status < 0)
 199                mlog_errno(status);
 200
 201        return status;
 202}
 203
 204/*
 205 * Calculate how many bytes are needed by the slot map.  Returns
 206 * an error if the slot map file is too small.
 207 */
 208static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
 209                                        struct inode *inode,
 210                                        unsigned long long *bytes)
 211{
 212        unsigned long long bytes_needed;
 213
 214        if (ocfs2_uses_extended_slot_map(osb)) {
 215                bytes_needed = osb->max_slots *
 216                        sizeof(struct ocfs2_extended_slot);
 217        } else {
 218                bytes_needed = osb->max_slots * sizeof(__le16);
 219        }
 220        if (bytes_needed > i_size_read(inode)) {
 221                mlog(ML_ERROR,
 222                     "Slot map file is too small!  (size %llu, needed %llu)\n",
 223                     i_size_read(inode), bytes_needed);
 224                return -ENOSPC;
 225        }
 226
 227        *bytes = bytes_needed;
 228        return 0;
 229}
 230
 231/* try to find global node in the slot info. Returns -ENOENT
 232 * if nothing is found. */
 233static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 234                                    unsigned int node_num)
 235{
 236        int i, ret = -ENOENT;
 237
 238        for(i = 0; i < si->si_num_slots; i++) {
 239                if (si->si_slots[i].sl_valid &&
 240                    (node_num == si->si_slots[i].sl_node_num)) {
 241                        ret = i;
 242                        break;
 243                }
 244        }
 245
 246        return ret;
 247}
 248
 249static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
 250                                   int preferred)
 251{
 252        int i, ret = -ENOSPC;
 253
 254        if ((preferred >= 0) && (preferred < si->si_num_slots)) {
 255                if (!si->si_slots[preferred].sl_valid ||
 256                    !si->si_slots[preferred].sl_node_num) {
 257                        ret = preferred;
 258                        goto out;
 259                }
 260        }
 261
 262        for(i = 0; i < si->si_num_slots; i++) {
 263                if (!si->si_slots[i].sl_valid ||
 264                    !si->si_slots[i].sl_node_num) {
 265                        ret = i;
 266                        break;
 267                }
 268        }
 269out:
 270        return ret;
 271}
 272
 273int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
 274{
 275        int slot;
 276        struct ocfs2_slot_info *si = osb->slot_info;
 277
 278        spin_lock(&osb->osb_lock);
 279        slot = __ocfs2_node_num_to_slot(si, node_num);
 280        spin_unlock(&osb->osb_lock);
 281
 282        return slot;
 283}
 284
 285int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 286                                  unsigned int *node_num)
 287{
 288        struct ocfs2_slot_info *si = osb->slot_info;
 289
 290        assert_spin_locked(&osb->osb_lock);
 291
 292        BUG_ON(slot_num < 0);
 293        BUG_ON(slot_num >= osb->max_slots);
 294
 295        if (!si->si_slots[slot_num].sl_valid)
 296                return -ENOENT;
 297
 298        *node_num = si->si_slots[slot_num].sl_node_num;
 299        return 0;
 300}
 301
 302static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
 303{
 304        unsigned int i;
 305
 306        if (si == NULL)
 307                return;
 308
 309        iput(si->si_inode);
 310        if (si->si_bh) {
 311                for (i = 0; i < si->si_blocks; i++) {
 312                        if (si->si_bh[i]) {
 313                                brelse(si->si_bh[i]);
 314                                si->si_bh[i] = NULL;
 315                        }
 316                }
 317                kfree(si->si_bh);
 318        }
 319
 320        kfree(si);
 321}
 322
 323int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 324{
 325        struct ocfs2_slot_info *si = osb->slot_info;
 326
 327        if (si == NULL)
 328                return 0;
 329
 330        spin_lock(&osb->osb_lock);
 331        ocfs2_invalidate_slot(si, slot_num);
 332        spin_unlock(&osb->osb_lock);
 333
 334        return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
 335}
 336
 337static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 338                                  struct ocfs2_slot_info *si)
 339{
 340        int status = 0;
 341        u64 blkno;
 342        unsigned long long blocks, bytes = 0;
 343        unsigned int i;
 344        struct buffer_head *bh;
 345
 346        status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
 347        if (status)
 348                goto bail;
 349
 350        blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
 351        BUG_ON(blocks > UINT_MAX);
 352        si->si_blocks = blocks;
 353        if (!si->si_blocks)
 354                goto bail;
 355
 356        if (si->si_extended)
 357                si->si_slots_per_block =
 358                        (osb->sb->s_blocksize /
 359                         sizeof(struct ocfs2_extended_slot));
 360        else
 361                si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
 362
 363        /* The size checks above should ensure this */
 364        BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
 365
 366        trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
 367
 368        si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
 369                            GFP_KERNEL);
 370        if (!si->si_bh) {
 371                status = -ENOMEM;
 372                mlog_errno(status);
 373                goto bail;
 374        }
 375
 376        for (i = 0; i < si->si_blocks; i++) {
 377                status = ocfs2_extent_map_get_blocks(si->si_inode, i,
 378                                                     &blkno, NULL, NULL);
 379                if (status < 0) {
 380                        mlog_errno(status);
 381                        goto bail;
 382                }
 383
 384                trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
 385
 386                bh = NULL;  /* Acquire a fresh bh */
 387                status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
 388                                           1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
 389                if (status < 0) {
 390                        mlog_errno(status);
 391                        goto bail;
 392                }
 393
 394                si->si_bh[i] = bh;
 395        }
 396
 397bail:
 398        return status;
 399}
 400
 401int ocfs2_init_slot_info(struct ocfs2_super *osb)
 402{
 403        int status;
 404        struct inode *inode = NULL;
 405        struct ocfs2_slot_info *si;
 406
 407        si = kzalloc(struct_size(si, si_slots, osb->max_slots), GFP_KERNEL);
 408        if (!si) {
 409                status = -ENOMEM;
 410                mlog_errno(status);
 411                return status;
 412        }
 413
 414        si->si_extended = ocfs2_uses_extended_slot_map(osb);
 415        si->si_num_slots = osb->max_slots;
 416
 417        inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
 418                                            OCFS2_INVALID_SLOT);
 419        if (!inode) {
 420                status = -EINVAL;
 421                mlog_errno(status);
 422                goto bail;
 423        }
 424
 425        si->si_inode = inode;
 426        status = ocfs2_map_slot_buffers(osb, si);
 427        if (status < 0) {
 428                mlog_errno(status);
 429                goto bail;
 430        }
 431
 432        osb->slot_info = (struct ocfs2_slot_info *)si;
 433bail:
 434        if (status < 0)
 435                __ocfs2_free_slot_info(si);
 436
 437        return status;
 438}
 439
 440void ocfs2_free_slot_info(struct ocfs2_super *osb)
 441{
 442        struct ocfs2_slot_info *si = osb->slot_info;
 443
 444        osb->slot_info = NULL;
 445        __ocfs2_free_slot_info(si);
 446}
 447
 448int ocfs2_find_slot(struct ocfs2_super *osb)
 449{
 450        int status;
 451        int slot;
 452        struct ocfs2_slot_info *si;
 453
 454        si = osb->slot_info;
 455
 456        spin_lock(&osb->osb_lock);
 457        ocfs2_update_slot_info(si);
 458
 459        if (ocfs2_mount_local(osb))
 460                /* use slot 0 directly in local mode */
 461                slot = 0;
 462        else {
 463                /* search for ourselves first and take the slot if it already
 464                 * exists. Perhaps we need to mark this in a variable for our
 465                 * own journal recovery? Possibly not, though we certainly
 466                 * need to warn to the user */
 467                slot = __ocfs2_node_num_to_slot(si, osb->node_num);
 468                if (slot < 0) {
 469                        /* if no slot yet, then just take 1st available
 470                         * one. */
 471                        slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
 472                        if (slot < 0) {
 473                                spin_unlock(&osb->osb_lock);
 474                                mlog(ML_ERROR, "no free slots available!\n");
 475                                status = -EINVAL;
 476                                goto bail;
 477                        }
 478                } else
 479                        printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
 480                               "already allocated to this node!\n",
 481                               slot, osb->dev_str);
 482        }
 483
 484        ocfs2_set_slot(si, slot, osb->node_num);
 485        osb->slot_num = slot;
 486        spin_unlock(&osb->osb_lock);
 487
 488        trace_ocfs2_find_slot(osb->slot_num);
 489
 490        status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
 491        if (status < 0) {
 492                mlog_errno(status);
 493                /*
 494                 * if write block failed, invalidate slot to avoid overwrite
 495                 * slot during dismount in case another node rightly has mounted
 496                 */
 497                spin_lock(&osb->osb_lock);
 498                ocfs2_invalidate_slot(si, osb->slot_num);
 499                osb->slot_num = OCFS2_INVALID_SLOT;
 500                spin_unlock(&osb->osb_lock);
 501        }
 502
 503bail:
 504        return status;
 505}
 506
 507void ocfs2_put_slot(struct ocfs2_super *osb)
 508{
 509        int status, slot_num;
 510        struct ocfs2_slot_info *si = osb->slot_info;
 511
 512        if (!si)
 513                return;
 514
 515        spin_lock(&osb->osb_lock);
 516        ocfs2_update_slot_info(si);
 517
 518        slot_num = osb->slot_num;
 519        ocfs2_invalidate_slot(si, osb->slot_num);
 520        osb->slot_num = OCFS2_INVALID_SLOT;
 521        spin_unlock(&osb->osb_lock);
 522
 523        status = ocfs2_update_disk_slot(osb, si, slot_num);
 524        if (status < 0)
 525                mlog_errno(status);
 526
 527        ocfs2_free_slot_info(osb);
 528}
 529