linux/fs/ocfs2/heartbeat.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * heartbeat.c
   5 *
   6 * Register ourselves with the heartbaet service, keep our node maps
   7 * up to date, and fire off recovery when needed.
   8 *
   9 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
  10 *
  11 * This program is free software; you can redistribute it and/or
  12 * modify it under the terms of the GNU General Public
  13 * License as published by the Free Software Foundation; either
  14 * version 2 of the License, or (at your option) any later version.
  15 *
  16 * This program is distributed in the hope that it will be useful,
  17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19 * General Public License for more details.
  20 *
  21 * You should have received a copy of the GNU General Public
  22 * License along with this program; if not, write to the
  23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 * Boston, MA 021110-1307, USA.
  25 */
  26
  27#include <linux/fs.h>
  28#include <linux/types.h>
  29#include <linux/slab.h>
  30#include <linux/highmem.h>
  31#include <linux/kmod.h>
  32
  33#include <cluster/heartbeat.h>
  34#include <cluster/nodemanager.h>
  35
  36#include <dlm/dlmapi.h>
  37
  38#define MLOG_MASK_PREFIX ML_SUPER
  39#include <cluster/masklog.h>
  40
  41#include "ocfs2.h"
  42
  43#include "alloc.h"
  44#include "heartbeat.h"
  45#include "inode.h"
  46#include "journal.h"
  47#include "vote.h"
  48
  49#include "buffer_head_io.h"
  50
  51#define OCFS2_HB_NODE_DOWN_PRI     (0x0000002)
  52#define OCFS2_HB_NODE_UP_PRI       OCFS2_HB_NODE_DOWN_PRI
  53
  54static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
  55                                            int bit);
  56static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
  57                                              int bit);
  58static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map);
  59static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
  60                                 struct ocfs2_node_map *from);
  61static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
  62                                 struct ocfs2_node_map *from);
  63
  64void ocfs2_init_node_maps(struct ocfs2_super *osb)
  65{
  66        spin_lock_init(&osb->node_map_lock);
  67        ocfs2_node_map_init(&osb->mounted_map);
  68        ocfs2_node_map_init(&osb->recovery_map);
  69        ocfs2_node_map_init(&osb->umount_map);
  70        ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
  71}
  72
  73static void ocfs2_do_node_down(int node_num,
  74                               struct ocfs2_super *osb)
  75{
  76        BUG_ON(osb->node_num == node_num);
  77
  78        mlog(0, "ocfs2: node down event for %d\n", node_num);
  79
  80        if (!osb->dlm) {
  81                /*
  82                 * No DLM means we're not even ready to participate yet.
  83                 * We check the slots after the DLM comes up, so we will
  84                 * notice the node death then.  We can safely ignore it
  85                 * here.
  86                 */
  87                return;
  88        }
  89
  90        if (ocfs2_node_map_test_bit(osb, &osb->umount_map, node_num)) {
  91                /* If a node is in the umount map, then we've been
  92                 * expecting him to go down and we know ahead of time
  93                 * that recovery is not necessary. */
  94                ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
  95                return;
  96        }
  97
  98        ocfs2_recovery_thread(osb, node_num);
  99
 100        ocfs2_remove_node_from_vote_queues(osb, node_num);
 101}
 102
 103static void ocfs2_hb_node_down_cb(struct o2nm_node *node,
 104                                  int node_num,
 105                                  void *data)
 106{
 107        ocfs2_do_node_down(node_num, (struct ocfs2_super *) data);
 108}
 109
 110/* Called from the dlm when it's about to evict a node. We may also
 111 * get a heartbeat callback later. */
 112static void ocfs2_dlm_eviction_cb(int node_num,
 113                                  void *data)
 114{
 115        struct ocfs2_super *osb = (struct ocfs2_super *) data;
 116        struct super_block *sb = osb->sb;
 117
 118        mlog(ML_NOTICE, "device (%u,%u): dlm has evicted node %d\n",
 119             MAJOR(sb->s_dev), MINOR(sb->s_dev), node_num);
 120
 121        ocfs2_do_node_down(node_num, osb);
 122}
 123
 124static void ocfs2_hb_node_up_cb(struct o2nm_node *node,
 125                                int node_num,
 126                                void *data)
 127{
 128        struct ocfs2_super *osb = data;
 129
 130        BUG_ON(osb->node_num == node_num);
 131
 132        mlog(0, "node up event for %d\n", node_num);
 133        ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
 134}
 135
 136void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
 137{
 138        o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB,
 139                            ocfs2_hb_node_down_cb, osb,
 140                            OCFS2_HB_NODE_DOWN_PRI);
 141
 142        o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB,
 143                            ocfs2_hb_node_up_cb, osb, OCFS2_HB_NODE_UP_PRI);
 144
 145        /* Not exactly a heartbeat callback, but leads to essentially
 146         * the same path so we set it up here. */
 147        dlm_setup_eviction_cb(&osb->osb_eviction_cb,
 148                              ocfs2_dlm_eviction_cb,
 149                              osb);
 150}
 151
 152/* Most functions here are just stubs for now... */
 153int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
 154{
 155        int status;
 156
 157        if (ocfs2_mount_local(osb))
 158                return 0;
 159
 160        status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
 161        if (status < 0) {
 162                mlog_errno(status);
 163                goto bail;
 164        }
 165
 166        status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
 167        if (status < 0) {
 168                mlog_errno(status);
 169                o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
 170        }
 171
 172bail:
 173        return status;
 174}
 175
 176void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
 177{
 178        if (ocfs2_mount_local(osb))
 179                return;
 180
 181        o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
 182        o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
 183}
 184
 185void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
 186{
 187        int ret;
 188        char *argv[5], *envp[3];
 189
 190        if (ocfs2_mount_local(osb))
 191                return;
 192
 193        if (!osb->uuid_str) {
 194                /* This can happen if we don't get far enough in mount... */
 195                mlog(0, "No UUID with which to stop heartbeat!\n\n");
 196                return;
 197        }
 198
 199        argv[0] = (char *)o2nm_get_hb_ctl_path();
 200        argv[1] = "-K";
 201        argv[2] = "-u";
 202        argv[3] = osb->uuid_str;
 203        argv[4] = NULL;
 204
 205        mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]);
 206
 207        /* minimal command environment taken from cpu_run_sbin_hotplug */
 208        envp[0] = "HOME=/";
 209        envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
 210        envp[2] = NULL;
 211
 212        ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 213        if (ret < 0)
 214                mlog_errno(ret);
 215}
 216
 217/* special case -1 for now
 218 * TODO: should *really* make sure the calling func never passes -1!!  */
 219void ocfs2_node_map_init(struct ocfs2_node_map *map)
 220{
 221        map->num_nodes = OCFS2_NODE_MAP_MAX_NODES;
 222        memset(map->map, 0, BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES) *
 223               sizeof(unsigned long));
 224}
 225
 226static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
 227                                            int bit)
 228{
 229        set_bit(bit, map->map);
 230}
 231
 232void ocfs2_node_map_set_bit(struct ocfs2_super *osb,
 233                            struct ocfs2_node_map *map,
 234                            int bit)
 235{
 236        if (bit==-1)
 237                return;
 238        BUG_ON(bit >= map->num_nodes);
 239        spin_lock(&osb->node_map_lock);
 240        __ocfs2_node_map_set_bit(map, bit);
 241        spin_unlock(&osb->node_map_lock);
 242}
 243
 244static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
 245                                              int bit)
 246{
 247        clear_bit(bit, map->map);
 248}
 249
 250void ocfs2_node_map_clear_bit(struct ocfs2_super *osb,
 251                              struct ocfs2_node_map *map,
 252                              int bit)
 253{
 254        if (bit==-1)
 255                return;
 256        BUG_ON(bit >= map->num_nodes);
 257        spin_lock(&osb->node_map_lock);
 258        __ocfs2_node_map_clear_bit(map, bit);
 259        spin_unlock(&osb->node_map_lock);
 260}
 261
 262int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
 263                            struct ocfs2_node_map *map,
 264                            int bit)
 265{
 266        int ret;
 267        if (bit >= map->num_nodes) {
 268                mlog(ML_ERROR, "bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
 269                BUG();
 270        }
 271        spin_lock(&osb->node_map_lock);
 272        ret = test_bit(bit, map->map);
 273        spin_unlock(&osb->node_map_lock);
 274        return ret;
 275}
 276
 277static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map)
 278{
 279        int bit;
 280        bit = find_next_bit(map->map, map->num_nodes, 0);
 281        if (bit < map->num_nodes)
 282                return 0;
 283        return 1;
 284}
 285
 286int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
 287                            struct ocfs2_node_map *map)
 288{
 289        int ret;
 290        BUG_ON(map->num_nodes == 0);
 291        spin_lock(&osb->node_map_lock);
 292        ret = __ocfs2_node_map_is_empty(map);
 293        spin_unlock(&osb->node_map_lock);
 294        return ret;
 295}
 296
 297static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
 298                                 struct ocfs2_node_map *from)
 299{
 300        BUG_ON(from->num_nodes == 0);
 301        ocfs2_node_map_init(target);
 302        __ocfs2_node_map_set(target, from);
 303}
 304
 305/* returns 1 if bit is the only bit set in target, 0 otherwise */
 306int ocfs2_node_map_is_only(struct ocfs2_super *osb,
 307                           struct ocfs2_node_map *target,
 308                           int bit)
 309{
 310        struct ocfs2_node_map temp;
 311        int ret;
 312
 313        spin_lock(&osb->node_map_lock);
 314        __ocfs2_node_map_dup(&temp, target);
 315        __ocfs2_node_map_clear_bit(&temp, bit);
 316        ret = __ocfs2_node_map_is_empty(&temp);
 317        spin_unlock(&osb->node_map_lock);
 318
 319        return ret;
 320}
 321
 322static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
 323                                 struct ocfs2_node_map *from)
 324{
 325        int num_longs, i;
 326
 327        BUG_ON(target->num_nodes != from->num_nodes);
 328        BUG_ON(target->num_nodes == 0);
 329
 330        num_longs = BITS_TO_LONGS(target->num_nodes);
 331        for (i = 0; i < num_longs; i++)
 332                target->map[i] = from->map[i];
 333}
 334
 335/* Returns whether the recovery bit was actually set - it may not be
 336 * if a node is still marked as needing recovery */
 337int ocfs2_recovery_map_set(struct ocfs2_super *osb,
 338                           int num)
 339{
 340        int set = 0;
 341
 342        spin_lock(&osb->node_map_lock);
 343
 344        __ocfs2_node_map_clear_bit(&osb->mounted_map, num);
 345
 346        if (!test_bit(num, osb->recovery_map.map)) {
 347            __ocfs2_node_map_set_bit(&osb->recovery_map, num);
 348            set = 1;
 349        }
 350
 351        spin_unlock(&osb->node_map_lock);
 352
 353        return set;
 354}
 355
 356void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
 357                              int num)
 358{
 359        ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num);
 360}
 361
 362int ocfs2_node_map_iterate(struct ocfs2_super *osb,
 363                           struct ocfs2_node_map *map,
 364                           int idx)
 365{
 366        int i = idx;
 367
 368        idx = O2NM_INVALID_NODE_NUM;
 369        spin_lock(&osb->node_map_lock);
 370        if ((i != O2NM_INVALID_NODE_NUM) &&
 371            (i >= 0) &&
 372            (i < map->num_nodes)) {
 373                while(i < map->num_nodes) {
 374                        if (test_bit(i, map->map)) {
 375                                idx = i;
 376                                break;
 377                        }
 378                        i++;
 379                }
 380        }
 381        spin_unlock(&osb->node_map_lock);
 382        return idx;
 383}
 384