linux/drivers/block/drbd/drbd_state.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3   drbd_state.c
   4
   5   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   6
   7   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   8   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   9   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
  10
  11   Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
  12   from Logicworks, Inc. for making SDP replication support possible.
  13
  14 */
  15
  16#include <linux/drbd_limits.h>
  17#include "drbd_int.h"
  18#include "drbd_protocol.h"
  19#include "drbd_req.h"
  20#include "drbd_state_change.h"
  21
  22struct after_state_chg_work {
  23        struct drbd_work w;
  24        struct drbd_device *device;
  25        union drbd_state os;
  26        union drbd_state ns;
  27        enum chg_state_flags flags;
  28        struct completion *done;
  29        struct drbd_state_change *state_change;
  30};
  31
  32enum sanitize_state_warnings {
  33        NO_WARNING,
  34        ABORTED_ONLINE_VERIFY,
  35        ABORTED_RESYNC,
  36        CONNECTION_LOST_NEGOTIATING,
  37        IMPLICITLY_UPGRADED_DISK,
  38        IMPLICITLY_UPGRADED_PDSK,
  39};
  40
  41static void count_objects(struct drbd_resource *resource,
  42                          unsigned int *n_devices,
  43                          unsigned int *n_connections)
  44{
  45        struct drbd_device *device;
  46        struct drbd_connection *connection;
  47        int vnr;
  48
  49        *n_devices = 0;
  50        *n_connections = 0;
  51
  52        idr_for_each_entry(&resource->devices, device, vnr)
  53                (*n_devices)++;
  54        for_each_connection(connection, resource)
  55                (*n_connections)++;
  56}
  57
  58static struct drbd_state_change *alloc_state_change(unsigned int n_devices, unsigned int n_connections, gfp_t gfp)
  59{
  60        struct drbd_state_change *state_change;
  61        unsigned int size, n;
  62
  63        size = sizeof(struct drbd_state_change) +
  64               n_devices * sizeof(struct drbd_device_state_change) +
  65               n_connections * sizeof(struct drbd_connection_state_change) +
  66               n_devices * n_connections * sizeof(struct drbd_peer_device_state_change);
  67        state_change = kmalloc(size, gfp);
  68        if (!state_change)
  69                return NULL;
  70        state_change->n_devices = n_devices;
  71        state_change->n_connections = n_connections;
  72        state_change->devices = (void *)(state_change + 1);
  73        state_change->connections = (void *)&state_change->devices[n_devices];
  74        state_change->peer_devices = (void *)&state_change->connections[n_connections];
  75        state_change->resource->resource = NULL;
  76        for (n = 0; n < n_devices; n++)
  77                state_change->devices[n].device = NULL;
  78        for (n = 0; n < n_connections; n++)
  79                state_change->connections[n].connection = NULL;
  80        return state_change;
  81}
  82
  83struct drbd_state_change *remember_old_state(struct drbd_resource *resource, gfp_t gfp)
  84{
  85        struct drbd_state_change *state_change;
  86        struct drbd_device *device;
  87        unsigned int n_devices;
  88        struct drbd_connection *connection;
  89        unsigned int n_connections;
  90        int vnr;
  91
  92        struct drbd_device_state_change *device_state_change;
  93        struct drbd_peer_device_state_change *peer_device_state_change;
  94        struct drbd_connection_state_change *connection_state_change;
  95
  96        /* Caller holds req_lock spinlock.
  97         * No state, no device IDR, no connections lists can change. */
  98        count_objects(resource, &n_devices, &n_connections);
  99        state_change = alloc_state_change(n_devices, n_connections, gfp);
 100        if (!state_change)
 101                return NULL;
 102
 103        kref_get(&resource->kref);
 104        state_change->resource->resource = resource;
 105        state_change->resource->role[OLD] =
 106                conn_highest_role(first_connection(resource));
 107        state_change->resource->susp[OLD] = resource->susp;
 108        state_change->resource->susp_nod[OLD] = resource->susp_nod;
 109        state_change->resource->susp_fen[OLD] = resource->susp_fen;
 110
 111        connection_state_change = state_change->connections;
 112        for_each_connection(connection, resource) {
 113                kref_get(&connection->kref);
 114                connection_state_change->connection = connection;
 115                connection_state_change->cstate[OLD] =
 116                        connection->cstate;
 117                connection_state_change->peer_role[OLD] =
 118                        conn_highest_peer(connection);
 119                connection_state_change++;
 120        }
 121
 122        device_state_change = state_change->devices;
 123        peer_device_state_change = state_change->peer_devices;
 124        idr_for_each_entry(&resource->devices, device, vnr) {
 125                kref_get(&device->kref);
 126                device_state_change->device = device;
 127                device_state_change->disk_state[OLD] = device->state.disk;
 128
 129                /* The peer_devices for each device have to be enumerated in
 130                   the order of the connections. We may not use for_each_peer_device() here. */
 131                for_each_connection(connection, resource) {
 132                        struct drbd_peer_device *peer_device;
 133
 134                        peer_device = conn_peer_device(connection, device->vnr);
 135                        peer_device_state_change->peer_device = peer_device;
 136                        peer_device_state_change->disk_state[OLD] =
 137                                device->state.pdsk;
 138                        peer_device_state_change->repl_state[OLD] =
 139                                max_t(enum drbd_conns,
 140                                      C_WF_REPORT_PARAMS, device->state.conn);
 141                        peer_device_state_change->resync_susp_user[OLD] =
 142                                device->state.user_isp;
 143                        peer_device_state_change->resync_susp_peer[OLD] =
 144                                device->state.peer_isp;
 145                        peer_device_state_change->resync_susp_dependency[OLD] =
 146                                device->state.aftr_isp;
 147                        peer_device_state_change++;
 148                }
 149                device_state_change++;
 150        }
 151
 152        return state_change;
 153}
 154
 155static void remember_new_state(struct drbd_state_change *state_change)
 156{
 157        struct drbd_resource_state_change *resource_state_change;
 158        struct drbd_resource *resource;
 159        unsigned int n;
 160
 161        if (!state_change)
 162                return;
 163
 164        resource_state_change = &state_change->resource[0];
 165        resource = resource_state_change->resource;
 166
 167        resource_state_change->role[NEW] =
 168                conn_highest_role(first_connection(resource));
 169        resource_state_change->susp[NEW] = resource->susp;
 170        resource_state_change->susp_nod[NEW] = resource->susp_nod;
 171        resource_state_change->susp_fen[NEW] = resource->susp_fen;
 172
 173        for (n = 0; n < state_change->n_devices; n++) {
 174                struct drbd_device_state_change *device_state_change =
 175                        &state_change->devices[n];
 176                struct drbd_device *device = device_state_change->device;
 177
 178                device_state_change->disk_state[NEW] = device->state.disk;
 179        }
 180
 181        for (n = 0; n < state_change->n_connections; n++) {
 182                struct drbd_connection_state_change *connection_state_change =
 183                        &state_change->connections[n];
 184                struct drbd_connection *connection =
 185                        connection_state_change->connection;
 186
 187                connection_state_change->cstate[NEW] = connection->cstate;
 188                connection_state_change->peer_role[NEW] =
 189                        conn_highest_peer(connection);
 190        }
 191
 192        for (n = 0; n < state_change->n_devices * state_change->n_connections; n++) {
 193                struct drbd_peer_device_state_change *peer_device_state_change =
 194                        &state_change->peer_devices[n];
 195                struct drbd_device *device =
 196                        peer_device_state_change->peer_device->device;
 197                union drbd_dev_state state = device->state;
 198
 199                peer_device_state_change->disk_state[NEW] = state.pdsk;
 200                peer_device_state_change->repl_state[NEW] =
 201                        max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn);
 202                peer_device_state_change->resync_susp_user[NEW] =
 203                        state.user_isp;
 204                peer_device_state_change->resync_susp_peer[NEW] =
 205                        state.peer_isp;
 206                peer_device_state_change->resync_susp_dependency[NEW] =
 207                        state.aftr_isp;
 208        }
 209}
 210
 211void copy_old_to_new_state_change(struct drbd_state_change *state_change)
 212{
 213        struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
 214        unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
 215
 216#define OLD_TO_NEW(x) \
 217        (x[NEW] = x[OLD])
 218
 219        OLD_TO_NEW(resource_state_change->role);
 220        OLD_TO_NEW(resource_state_change->susp);
 221        OLD_TO_NEW(resource_state_change->susp_nod);
 222        OLD_TO_NEW(resource_state_change->susp_fen);
 223
 224        for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
 225                struct drbd_connection_state_change *connection_state_change =
 226                                &state_change->connections[n_connection];
 227
 228                OLD_TO_NEW(connection_state_change->peer_role);
 229                OLD_TO_NEW(connection_state_change->cstate);
 230        }
 231
 232        for (n_device = 0; n_device < state_change->n_devices; n_device++) {
 233                struct drbd_device_state_change *device_state_change =
 234                        &state_change->devices[n_device];
 235
 236                OLD_TO_NEW(device_state_change->disk_state);
 237        }
 238
 239        n_peer_devices = state_change->n_devices * state_change->n_connections;
 240        for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
 241                struct drbd_peer_device_state_change *p =
 242                        &state_change->peer_devices[n_peer_device];
 243
 244                OLD_TO_NEW(p->disk_state);
 245                OLD_TO_NEW(p->repl_state);
 246                OLD_TO_NEW(p->resync_susp_user);
 247                OLD_TO_NEW(p->resync_susp_peer);
 248                OLD_TO_NEW(p->resync_susp_dependency);
 249        }
 250
 251#undef OLD_TO_NEW
 252}
 253
 254void forget_state_change(struct drbd_state_change *state_change)
 255{
 256        unsigned int n;
 257
 258        if (!state_change)
 259                return;
 260
 261        if (state_change->resource->resource)
 262                kref_put(&state_change->resource->resource->kref, drbd_destroy_resource);
 263        for (n = 0; n < state_change->n_devices; n++) {
 264                struct drbd_device *device = state_change->devices[n].device;
 265
 266                if (device)
 267                        kref_put(&device->kref, drbd_destroy_device);
 268        }
 269        for (n = 0; n < state_change->n_connections; n++) {
 270                struct drbd_connection *connection =
 271                        state_change->connections[n].connection;
 272
 273                if (connection)
 274                        kref_put(&connection->kref, drbd_destroy_connection);
 275        }
 276        kfree(state_change);
 277}
 278
 279static int w_after_state_ch(struct drbd_work *w, int unused);
 280static void after_state_ch(struct drbd_device *device, union drbd_state os,
 281                           union drbd_state ns, enum chg_state_flags flags,
 282                           struct drbd_state_change *);
 283static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
 284static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
 285static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
 286static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
 287                                       union drbd_state ns, enum sanitize_state_warnings *warn);
 288
 289static inline bool is_susp(union drbd_state s)
 290{
 291        return s.susp || s.susp_nod || s.susp_fen;
 292}
 293
 294bool conn_all_vols_unconf(struct drbd_connection *connection)
 295{
 296        struct drbd_peer_device *peer_device;
 297        bool rv = true;
 298        int vnr;
 299
 300        rcu_read_lock();
 301        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 302                struct drbd_device *device = peer_device->device;
 303                if (device->state.disk != D_DISKLESS ||
 304                    device->state.conn != C_STANDALONE ||
 305                    device->state.role != R_SECONDARY) {
 306                        rv = false;
 307                        break;
 308                }
 309        }
 310        rcu_read_unlock();
 311
 312        return rv;
 313}
 314
 315/* Unfortunately the states where not correctly ordered, when
 316   they where defined. therefore can not use max_t() here. */
 317static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
 318{
 319        if (role1 == R_PRIMARY || role2 == R_PRIMARY)
 320                return R_PRIMARY;
 321        if (role1 == R_SECONDARY || role2 == R_SECONDARY)
 322                return R_SECONDARY;
 323        return R_UNKNOWN;
 324}
 325
 326static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
 327{
 328        if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
 329                return R_UNKNOWN;
 330        if (role1 == R_SECONDARY || role2 == R_SECONDARY)
 331                return R_SECONDARY;
 332        return R_PRIMARY;
 333}
 334
 335enum drbd_role conn_highest_role(struct drbd_connection *connection)
 336{
 337        enum drbd_role role = R_SECONDARY;
 338        struct drbd_peer_device *peer_device;
 339        int vnr;
 340
 341        rcu_read_lock();
 342        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 343                struct drbd_device *device = peer_device->device;
 344                role = max_role(role, device->state.role);
 345        }
 346        rcu_read_unlock();
 347
 348        return role;
 349}
 350
 351enum drbd_role conn_highest_peer(struct drbd_connection *connection)
 352{
 353        enum drbd_role peer = R_UNKNOWN;
 354        struct drbd_peer_device *peer_device;
 355        int vnr;
 356
 357        rcu_read_lock();
 358        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 359                struct drbd_device *device = peer_device->device;
 360                peer = max_role(peer, device->state.peer);
 361        }
 362        rcu_read_unlock();
 363
 364        return peer;
 365}
 366
 367enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection)
 368{
 369        enum drbd_disk_state disk_state = D_DISKLESS;
 370        struct drbd_peer_device *peer_device;
 371        int vnr;
 372
 373        rcu_read_lock();
 374        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 375                struct drbd_device *device = peer_device->device;
 376                disk_state = max_t(enum drbd_disk_state, disk_state, device->state.disk);
 377        }
 378        rcu_read_unlock();
 379
 380        return disk_state;
 381}
 382
 383enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection)
 384{
 385        enum drbd_disk_state disk_state = D_MASK;
 386        struct drbd_peer_device *peer_device;
 387        int vnr;
 388
 389        rcu_read_lock();
 390        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 391                struct drbd_device *device = peer_device->device;
 392                disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
 393        }
 394        rcu_read_unlock();
 395
 396        return disk_state;
 397}
 398
 399enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection)
 400{
 401        enum drbd_disk_state disk_state = D_DISKLESS;
 402        struct drbd_peer_device *peer_device;
 403        int vnr;
 404
 405        rcu_read_lock();
 406        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 407                struct drbd_device *device = peer_device->device;
 408                disk_state = max_t(enum drbd_disk_state, disk_state, device->state.pdsk);
 409        }
 410        rcu_read_unlock();
 411
 412        return disk_state;
 413}
 414
 415enum drbd_conns conn_lowest_conn(struct drbd_connection *connection)
 416{
 417        enum drbd_conns conn = C_MASK;
 418        struct drbd_peer_device *peer_device;
 419        int vnr;
 420
 421        rcu_read_lock();
 422        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 423                struct drbd_device *device = peer_device->device;
 424                conn = min_t(enum drbd_conns, conn, device->state.conn);
 425        }
 426        rcu_read_unlock();
 427
 428        return conn;
 429}
 430
 431static bool no_peer_wf_report_params(struct drbd_connection *connection)
 432{
 433        struct drbd_peer_device *peer_device;
 434        int vnr;
 435        bool rv = true;
 436
 437        rcu_read_lock();
 438        idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
 439                if (peer_device->device->state.conn == C_WF_REPORT_PARAMS) {
 440                        rv = false;
 441                        break;
 442                }
 443        rcu_read_unlock();
 444
 445        return rv;
 446}
 447
 448static void wake_up_all_devices(struct drbd_connection *connection)
 449{
 450        struct drbd_peer_device *peer_device;
 451        int vnr;
 452
 453        rcu_read_lock();
 454        idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
 455                wake_up(&peer_device->device->state_wait);
 456        rcu_read_unlock();
 457
 458}
 459
 460
 461/**
 462 * cl_wide_st_chg() - true if the state change is a cluster wide one
 463 * @device:     DRBD device.
 464 * @os:         old (current) state.
 465 * @ns:         new (wanted) state.
 466 */
 467static int cl_wide_st_chg(struct drbd_device *device,
 468                          union drbd_state os, union drbd_state ns)
 469{
 470        return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
 471                 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
 472                  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
 473                  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
 474                  (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
 475                (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
 476                (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) ||
 477                (os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS);
 478}
 479
 480static union drbd_state
 481apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val)
 482{
 483        union drbd_state ns;
 484        ns.i = (os.i & ~mask.i) | val.i;
 485        return ns;
 486}
 487
 488enum drbd_state_rv
 489drbd_change_state(struct drbd_device *device, enum chg_state_flags f,
 490                  union drbd_state mask, union drbd_state val)
 491{
 492        unsigned long flags;
 493        union drbd_state ns;
 494        enum drbd_state_rv rv;
 495
 496        spin_lock_irqsave(&device->resource->req_lock, flags);
 497        ns = apply_mask_val(drbd_read_state(device), mask, val);
 498        rv = _drbd_set_state(device, ns, f, NULL);
 499        spin_unlock_irqrestore(&device->resource->req_lock, flags);
 500
 501        return rv;
 502}
 503
 504/**
 505 * drbd_force_state() - Impose a change which happens outside our control on our state
 506 * @device:     DRBD device.
 507 * @mask:       mask of state bits to change.
 508 * @val:        value of new state bits.
 509 */
 510void drbd_force_state(struct drbd_device *device,
 511        union drbd_state mask, union drbd_state val)
 512{
 513        drbd_change_state(device, CS_HARD, mask, val);
 514}
 515
 516static enum drbd_state_rv
 517_req_st_cond(struct drbd_device *device, union drbd_state mask,
 518             union drbd_state val)
 519{
 520        union drbd_state os, ns;
 521        unsigned long flags;
 522        enum drbd_state_rv rv;
 523
 524        if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &device->flags))
 525                return SS_CW_SUCCESS;
 526
 527        if (test_and_clear_bit(CL_ST_CHG_FAIL, &device->flags))
 528                return SS_CW_FAILED_BY_PEER;
 529
 530        spin_lock_irqsave(&device->resource->req_lock, flags);
 531        os = drbd_read_state(device);
 532        ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
 533        rv = is_valid_transition(os, ns);
 534        if (rv >= SS_SUCCESS)
 535                rv = SS_UNKNOWN_ERROR;  /* cont waiting, otherwise fail. */
 536
 537        if (!cl_wide_st_chg(device, os, ns))
 538                rv = SS_CW_NO_NEED;
 539        if (rv == SS_UNKNOWN_ERROR) {
 540                rv = is_valid_state(device, ns);
 541                if (rv >= SS_SUCCESS) {
 542                        rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
 543                        if (rv >= SS_SUCCESS)
 544                                rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
 545                }
 546        }
 547        spin_unlock_irqrestore(&device->resource->req_lock, flags);
 548
 549        return rv;
 550}
 551
 552/**
 553 * drbd_req_state() - Perform an eventually cluster wide state change
 554 * @device:     DRBD device.
 555 * @mask:       mask of state bits to change.
 556 * @val:        value of new state bits.
 557 * @f:          flags
 558 *
 559 * Should not be called directly, use drbd_request_state() or
 560 * _drbd_request_state().
 561 */
 562static enum drbd_state_rv
 563drbd_req_state(struct drbd_device *device, union drbd_state mask,
 564               union drbd_state val, enum chg_state_flags f)
 565{
 566        struct completion done;
 567        unsigned long flags;
 568        union drbd_state os, ns;
 569        enum drbd_state_rv rv;
 570        void *buffer = NULL;
 571
 572        init_completion(&done);
 573
 574        if (f & CS_SERIALIZE)
 575                mutex_lock(device->state_mutex);
 576        if (f & CS_INHIBIT_MD_IO)
 577                buffer = drbd_md_get_buffer(device, __func__);
 578
 579        spin_lock_irqsave(&device->resource->req_lock, flags);
 580        os = drbd_read_state(device);
 581        ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
 582        rv = is_valid_transition(os, ns);
 583        if (rv < SS_SUCCESS) {
 584                spin_unlock_irqrestore(&device->resource->req_lock, flags);
 585                goto abort;
 586        }
 587
 588        if (cl_wide_st_chg(device, os, ns)) {
 589                rv = is_valid_state(device, ns);
 590                if (rv == SS_SUCCESS)
 591                        rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
 592                spin_unlock_irqrestore(&device->resource->req_lock, flags);
 593
 594                if (rv < SS_SUCCESS) {
 595                        if (f & CS_VERBOSE)
 596                                print_st_err(device, os, ns, rv);
 597                        goto abort;
 598                }
 599
 600                if (drbd_send_state_req(first_peer_device(device), mask, val)) {
 601                        rv = SS_CW_FAILED_BY_PEER;
 602                        if (f & CS_VERBOSE)
 603                                print_st_err(device, os, ns, rv);
 604                        goto abort;
 605                }
 606
 607                wait_event(device->state_wait,
 608                        (rv = _req_st_cond(device, mask, val)));
 609
 610                if (rv < SS_SUCCESS) {
 611                        if (f & CS_VERBOSE)
 612                                print_st_err(device, os, ns, rv);
 613                        goto abort;
 614                }
 615                spin_lock_irqsave(&device->resource->req_lock, flags);
 616                ns = apply_mask_val(drbd_read_state(device), mask, val);
 617                rv = _drbd_set_state(device, ns, f, &done);
 618        } else {
 619                rv = _drbd_set_state(device, ns, f, &done);
 620        }
 621
 622        spin_unlock_irqrestore(&device->resource->req_lock, flags);
 623
 624        if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
 625                D_ASSERT(device, current != first_peer_device(device)->connection->worker.task);
 626                wait_for_completion(&done);
 627        }
 628
 629abort:
 630        if (buffer)
 631                drbd_md_put_buffer(device);
 632        if (f & CS_SERIALIZE)
 633                mutex_unlock(device->state_mutex);
 634
 635        return rv;
 636}
 637
 638/**
 639 * _drbd_request_state() - Request a state change (with flags)
 640 * @device:     DRBD device.
 641 * @mask:       mask of state bits to change.
 642 * @val:        value of new state bits.
 643 * @f:          flags
 644 *
 645 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
 646 * flag, or when logging of failed state change requests is not desired.
 647 */
 648enum drbd_state_rv
 649_drbd_request_state(struct drbd_device *device, union drbd_state mask,
 650                    union drbd_state val, enum chg_state_flags f)
 651{
 652        enum drbd_state_rv rv;
 653
 654        wait_event(device->state_wait,
 655                   (rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE);
 656
 657        return rv;
 658}
 659
 660/*
 661 * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
 662 * there is IO in-flight: the transition into D_FAILED for detach purposes
 663 * may get misinterpreted as actual IO error in a confused endio function.
 664 *
 665 * We wrap it all into wait_event(), to retry in case the drbd_req_state()
 666 * returns SS_IN_TRANSIENT_STATE.
 667 *
 668 * To avoid potential deadlock with e.g. the receiver thread trying to grab
 669 * drbd_md_get_buffer() while trying to get out of the "transient state", we
 670 * need to grab and release the meta data buffer inside of that wait_event loop.
 671 */
 672static enum drbd_state_rv
 673request_detach(struct drbd_device *device)
 674{
 675        return drbd_req_state(device, NS(disk, D_FAILED),
 676                        CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
 677}
 678
 679int drbd_request_detach_interruptible(struct drbd_device *device)
 680{
 681        int ret, rv;
 682
 683        drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
 684        wait_event_interruptible(device->state_wait,
 685                (rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
 686        drbd_resume_io(device);
 687
 688        ret = wait_event_interruptible(device->misc_wait,
 689                        device->state.disk != D_FAILED);
 690
 691        if (rv == SS_IS_DISKLESS)
 692                rv = SS_NOTHING_TO_DO;
 693        if (ret)
 694                rv = ERR_INTR;
 695
 696        return rv;
 697}
 698
 699enum drbd_state_rv
 700_drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask,
 701                    union drbd_state val, enum chg_state_flags f)
 702{
 703        enum drbd_state_rv rv;
 704
 705        BUG_ON(f & CS_SERIALIZE);
 706
 707        wait_event_cmd(device->state_wait,
 708                       (rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE,
 709                       mutex_unlock(device->state_mutex),
 710                       mutex_lock(device->state_mutex));
 711
 712        return rv;
 713}
 714
 715static void print_st(struct drbd_device *device, const char *name, union drbd_state ns)
 716{
 717        drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
 718            name,
 719            drbd_conn_str(ns.conn),
 720            drbd_role_str(ns.role),
 721            drbd_role_str(ns.peer),
 722            drbd_disk_str(ns.disk),
 723            drbd_disk_str(ns.pdsk),
 724            is_susp(ns) ? 's' : 'r',
 725            ns.aftr_isp ? 'a' : '-',
 726            ns.peer_isp ? 'p' : '-',
 727            ns.user_isp ? 'u' : '-',
 728            ns.susp_fen ? 'F' : '-',
 729            ns.susp_nod ? 'N' : '-'
 730            );
 731}
 732
 733void print_st_err(struct drbd_device *device, union drbd_state os,
 734                  union drbd_state ns, enum drbd_state_rv err)
 735{
 736        if (err == SS_IN_TRANSIENT_STATE)
 737                return;
 738        drbd_err(device, "State change failed: %s\n", drbd_set_st_err_str(err));
 739        print_st(device, " state", os);
 740        print_st(device, "wanted", ns);
 741}
 742
 743static long print_state_change(char *pb, union drbd_state os, union drbd_state ns,
 744                               enum chg_state_flags flags)
 745{
 746        char *pbp;
 747        pbp = pb;
 748        *pbp = 0;
 749
 750        if (ns.role != os.role && flags & CS_DC_ROLE)
 751                pbp += sprintf(pbp, "role( %s -> %s ) ",
 752                               drbd_role_str(os.role),
 753                               drbd_role_str(ns.role));
 754        if (ns.peer != os.peer && flags & CS_DC_PEER)
 755                pbp += sprintf(pbp, "peer( %s -> %s ) ",
 756                               drbd_role_str(os.peer),
 757                               drbd_role_str(ns.peer));
 758        if (ns.conn != os.conn && flags & CS_DC_CONN)
 759                pbp += sprintf(pbp, "conn( %s -> %s ) ",
 760                               drbd_conn_str(os.conn),
 761                               drbd_conn_str(ns.conn));
 762        if (ns.disk != os.disk && flags & CS_DC_DISK)
 763                pbp += sprintf(pbp, "disk( %s -> %s ) ",
 764                               drbd_disk_str(os.disk),
 765                               drbd_disk_str(ns.disk));
 766        if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK)
 767                pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
 768                               drbd_disk_str(os.pdsk),
 769                               drbd_disk_str(ns.pdsk));
 770
 771        return pbp - pb;
 772}
 773
 774static void drbd_pr_state_change(struct drbd_device *device, union drbd_state os, union drbd_state ns,
 775                                 enum chg_state_flags flags)
 776{
 777        char pb[300];
 778        char *pbp = pb;
 779
 780        pbp += print_state_change(pbp, os, ns, flags ^ CS_DC_MASK);
 781
 782        if (ns.aftr_isp != os.aftr_isp)
 783                pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
 784                               os.aftr_isp,
 785                               ns.aftr_isp);
 786        if (ns.peer_isp != os.peer_isp)
 787                pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
 788                               os.peer_isp,
 789                               ns.peer_isp);
 790        if (ns.user_isp != os.user_isp)
 791                pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
 792                               os.user_isp,
 793                               ns.user_isp);
 794
 795        if (pbp != pb)
 796                drbd_info(device, "%s\n", pb);
 797}
 798
 799static void conn_pr_state_change(struct drbd_connection *connection, union drbd_state os, union drbd_state ns,
 800                                 enum chg_state_flags flags)
 801{
 802        char pb[300];
 803        char *pbp = pb;
 804
 805        pbp += print_state_change(pbp, os, ns, flags);
 806
 807        if (is_susp(ns) != is_susp(os) && flags & CS_DC_SUSP)
 808                pbp += sprintf(pbp, "susp( %d -> %d ) ",
 809                               is_susp(os),
 810                               is_susp(ns));
 811
 812        if (pbp != pb)
 813                drbd_info(connection, "%s\n", pb);
 814}
 815
 816
 817/**
 818 * is_valid_state() - Returns an SS_ error code if ns is not valid
 819 * @device:     DRBD device.
 820 * @ns:         State to consider.
 821 */
 822static enum drbd_state_rv
 823is_valid_state(struct drbd_device *device, union drbd_state ns)
 824{
 825        /* See drbd_state_sw_errors in drbd_strings.c */
 826
 827        enum drbd_fencing_p fp;
 828        enum drbd_state_rv rv = SS_SUCCESS;
 829        struct net_conf *nc;
 830
 831        rcu_read_lock();
 832        fp = FP_DONT_CARE;
 833        if (get_ldev(device)) {
 834                fp = rcu_dereference(device->ldev->disk_conf)->fencing;
 835                put_ldev(device);
 836        }
 837
 838        nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
 839        if (nc) {
 840                if (!nc->two_primaries && ns.role == R_PRIMARY) {
 841                        if (ns.peer == R_PRIMARY)
 842                                rv = SS_TWO_PRIMARIES;
 843                        else if (conn_highest_peer(first_peer_device(device)->connection) == R_PRIMARY)
 844                                rv = SS_O_VOL_PEER_PRI;
 845                }
 846        }
 847
 848        if (rv <= 0)
 849                goto out; /* already found a reason to abort */
 850        else if (ns.role == R_SECONDARY && device->open_cnt)
 851                rv = SS_DEVICE_IN_USE;
 852
 853        else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
 854                rv = SS_NO_UP_TO_DATE_DISK;
 855
 856        else if (fp >= FP_RESOURCE &&
 857                 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
 858                rv = SS_PRIMARY_NOP;
 859
 860        else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
 861                rv = SS_NO_UP_TO_DATE_DISK;
 862
 863        else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
 864                rv = SS_NO_LOCAL_DISK;
 865
 866        else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
 867                rv = SS_NO_REMOTE_DISK;
 868
 869        else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
 870                rv = SS_NO_UP_TO_DATE_DISK;
 871
 872        else if ((ns.conn == C_CONNECTED ||
 873                  ns.conn == C_WF_BITMAP_S ||
 874                  ns.conn == C_SYNC_SOURCE ||
 875                  ns.conn == C_PAUSED_SYNC_S) &&
 876                  ns.disk == D_OUTDATED)
 877                rv = SS_CONNECTED_OUTDATES;
 878
 879        else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
 880                 (nc->verify_alg[0] == 0))
 881                rv = SS_NO_VERIFY_ALG;
 882
 883        else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
 884                  first_peer_device(device)->connection->agreed_pro_version < 88)
 885                rv = SS_NOT_SUPPORTED;
 886
 887        else if (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
 888                rv = SS_NO_UP_TO_DATE_DISK;
 889
 890        else if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
 891                 ns.pdsk == D_UNKNOWN)
 892                rv = SS_NEED_CONNECTION;
 893
 894        else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
 895                rv = SS_CONNECTED_OUTDATES;
 896
 897out:
 898        rcu_read_unlock();
 899
 900        return rv;
 901}
 902
 903/**
 904 * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
 905 * This function limits state transitions that may be declined by DRBD. I.e.
 906 * user requests (aka soft transitions).
 907 * @os:         old state.
 908 * @ns:         new state.
 909 * @connection:  DRBD connection.
 910 */
 911static enum drbd_state_rv
 912is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_connection *connection)
 913{
 914        enum drbd_state_rv rv = SS_SUCCESS;
 915
 916        if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
 917            os.conn > C_CONNECTED)
 918                rv = SS_RESYNC_RUNNING;
 919
 920        if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
 921                rv = SS_ALREADY_STANDALONE;
 922
 923        if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
 924                rv = SS_IS_DISKLESS;
 925
 926        if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
 927                rv = SS_NO_NET_CONFIG;
 928
 929        if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
 930                rv = SS_LOWER_THAN_OUTDATED;
 931
 932        if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
 933                rv = SS_IN_TRANSIENT_STATE;
 934
 935        /* While establishing a connection only allow cstate to change.
 936           Delay/refuse role changes, detach attach etc... (they do not touch cstate) */
 937        if (test_bit(STATE_SENT, &connection->flags) &&
 938            !((ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION) ||
 939              (ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS)))
 940                rv = SS_IN_TRANSIENT_STATE;
 941
 942        /* Do not promote during resync handshake triggered by "force primary".
 943         * This is a hack. It should really be rejected by the peer during the
 944         * cluster wide state change request. */
 945        if (os.role != R_PRIMARY && ns.role == R_PRIMARY
 946                && ns.pdsk == D_UP_TO_DATE
 947                && ns.disk != D_UP_TO_DATE && ns.disk != D_DISKLESS
 948                && (ns.conn <= C_WF_SYNC_UUID || ns.conn != os.conn))
 949                        rv = SS_IN_TRANSIENT_STATE;
 950
 951        if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
 952                rv = SS_NEED_CONNECTION;
 953
 954        if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
 955            ns.conn != os.conn && os.conn > C_CONNECTED)
 956                rv = SS_RESYNC_RUNNING;
 957
 958        if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
 959            os.conn < C_CONNECTED)
 960                rv = SS_NEED_CONNECTION;
 961
 962        if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
 963            && os.conn < C_WF_REPORT_PARAMS)
 964                rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
 965
 966        if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED &&
 967            os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)
 968                rv = SS_OUTDATE_WO_CONN;
 969
 970        return rv;
 971}
 972
 973static enum drbd_state_rv
 974is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
 975{
 976        /* no change -> nothing to do, at least for the connection part */
 977        if (oc == nc)
 978                return SS_NOTHING_TO_DO;
 979
 980        /* disconnect of an unconfigured connection does not make sense */
 981        if (oc == C_STANDALONE && nc == C_DISCONNECTING)
 982                return SS_ALREADY_STANDALONE;
 983
 984        /* from C_STANDALONE, we start with C_UNCONNECTED */
 985        if (oc == C_STANDALONE && nc != C_UNCONNECTED)
 986                return SS_NEED_CONNECTION;
 987
 988        /* When establishing a connection we need to go through WF_REPORT_PARAMS!
 989           Necessary to do the right thing upon invalidate-remote on a disconnected resource */
 990        if (oc < C_WF_REPORT_PARAMS && nc >= C_CONNECTED)
 991                return SS_NEED_CONNECTION;
 992
 993        /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
 994        if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
 995                return SS_IN_TRANSIENT_STATE;
 996
 997        /* After C_DISCONNECTING only C_STANDALONE may follow */
 998        if (oc == C_DISCONNECTING && nc != C_STANDALONE)
 999                return SS_IN_TRANSIENT_STATE;
1000
1001        return SS_SUCCESS;
1002}
1003
1004
1005/**
1006 * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
1007 * This limits hard state transitions. Hard state transitions are facts there are
1008 * imposed on DRBD by the environment. E.g. disk broke or network broke down.
1009 * But those hard state transitions are still not allowed to do everything.
1010 * @ns:         new state.
1011 * @os:         old state.
1012 */
1013static enum drbd_state_rv
1014is_valid_transition(union drbd_state os, union drbd_state ns)
1015{
1016        enum drbd_state_rv rv;
1017
1018        rv = is_valid_conn_transition(os.conn, ns.conn);
1019
1020        /* we cannot fail (again) if we already detached */
1021        if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
1022                rv = SS_IS_DISKLESS;
1023
1024        return rv;
1025}
1026
1027static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_state_warnings warn)
1028{
1029        static const char *msg_table[] = {
1030                [NO_WARNING] = "",
1031                [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
1032                [ABORTED_RESYNC] = "Resync aborted.",
1033                [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
1034                [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
1035                [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
1036        };
1037
1038        if (warn != NO_WARNING)
1039                drbd_warn(device, "%s\n", msg_table[warn]);
1040}
1041
1042/**
1043 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
1044 * @device:     DRBD device.
1045 * @os:         old state.
1046 * @ns:         new state.
1047 * @warn:       placeholder for returned state warning.
1048 *
1049 * When we loose connection, we have to set the state of the peers disk (pdsk)
1050 * to D_UNKNOWN. This rule and many more along those lines are in this function.
1051 */
1052static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
1053                                       union drbd_state ns, enum sanitize_state_warnings *warn)
1054{
1055        enum drbd_fencing_p fp;
1056        enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
1057
1058        if (warn)
1059                *warn = NO_WARNING;
1060
1061        fp = FP_DONT_CARE;
1062        if (get_ldev(device)) {
1063                rcu_read_lock();
1064                fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1065                rcu_read_unlock();
1066                put_ldev(device);
1067        }
1068
1069        /* Implications from connection to peer and peer_isp */
1070        if (ns.conn < C_CONNECTED) {
1071                ns.peer_isp = 0;
1072                ns.peer = R_UNKNOWN;
1073                if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
1074                        ns.pdsk = D_UNKNOWN;
1075        }
1076
1077        /* Clear the aftr_isp when becoming unconfigured */
1078        if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
1079                ns.aftr_isp = 0;
1080
1081        /* An implication of the disk states onto the connection state */
1082        /* Abort resync if a disk fails/detaches */
1083        if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
1084                if (warn)
1085                        *warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
1086                                ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
1087                ns.conn = C_CONNECTED;
1088        }
1089
1090        /* Connection breaks down before we finished "Negotiating" */
1091        if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
1092            get_ldev_if_state(device, D_NEGOTIATING)) {
1093                if (device->ed_uuid == device->ldev->md.uuid[UI_CURRENT]) {
1094                        ns.disk = device->new_state_tmp.disk;
1095                        ns.pdsk = device->new_state_tmp.pdsk;
1096                } else {
1097                        if (warn)
1098                                *warn = CONNECTION_LOST_NEGOTIATING;
1099                        ns.disk = D_DISKLESS;
1100                        ns.pdsk = D_UNKNOWN;
1101                }
1102                put_ldev(device);
1103        }
1104
1105        /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
1106        if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
1107                if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
1108                        ns.disk = D_UP_TO_DATE;
1109                if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
1110                        ns.pdsk = D_UP_TO_DATE;
1111        }
1112
1113        /* Implications of the connection state on the disk states */
1114        disk_min = D_DISKLESS;
1115        disk_max = D_UP_TO_DATE;
1116        pdsk_min = D_INCONSISTENT;
1117        pdsk_max = D_UNKNOWN;
1118        switch ((enum drbd_conns)ns.conn) {
1119        case C_WF_BITMAP_T:
1120        case C_PAUSED_SYNC_T:
1121        case C_STARTING_SYNC_T:
1122        case C_WF_SYNC_UUID:
1123        case C_BEHIND:
1124                disk_min = D_INCONSISTENT;
1125                disk_max = D_OUTDATED;
1126                pdsk_min = D_UP_TO_DATE;
1127                pdsk_max = D_UP_TO_DATE;
1128                break;
1129        case C_VERIFY_S:
1130        case C_VERIFY_T:
1131                disk_min = D_UP_TO_DATE;
1132                disk_max = D_UP_TO_DATE;
1133                pdsk_min = D_UP_TO_DATE;
1134                pdsk_max = D_UP_TO_DATE;
1135                break;
1136        case C_CONNECTED:
1137                disk_min = D_DISKLESS;
1138                disk_max = D_UP_TO_DATE;
1139                pdsk_min = D_DISKLESS;
1140                pdsk_max = D_UP_TO_DATE;
1141                break;
1142        case C_WF_BITMAP_S:
1143        case C_PAUSED_SYNC_S:
1144        case C_STARTING_SYNC_S:
1145        case C_AHEAD:
1146                disk_min = D_UP_TO_DATE;
1147                disk_max = D_UP_TO_DATE;
1148                pdsk_min = D_INCONSISTENT;
1149                pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
1150                break;
1151        case C_SYNC_TARGET:
1152                disk_min = D_INCONSISTENT;
1153                disk_max = D_INCONSISTENT;
1154                pdsk_min = D_UP_TO_DATE;
1155                pdsk_max = D_UP_TO_DATE;
1156                break;
1157        case C_SYNC_SOURCE:
1158                disk_min = D_UP_TO_DATE;
1159                disk_max = D_UP_TO_DATE;
1160                pdsk_min = D_INCONSISTENT;
1161                pdsk_max = D_INCONSISTENT;
1162                break;
1163        case C_STANDALONE:
1164        case C_DISCONNECTING:
1165        case C_UNCONNECTED:
1166        case C_TIMEOUT:
1167        case C_BROKEN_PIPE:
1168        case C_NETWORK_FAILURE:
1169        case C_PROTOCOL_ERROR:
1170        case C_TEAR_DOWN:
1171        case C_WF_CONNECTION:
1172        case C_WF_REPORT_PARAMS:
1173        case C_MASK:
1174                break;
1175        }
1176        if (ns.disk > disk_max)
1177                ns.disk = disk_max;
1178
1179        if (ns.disk < disk_min) {
1180                if (warn)
1181                        *warn = IMPLICITLY_UPGRADED_DISK;
1182                ns.disk = disk_min;
1183        }
1184        if (ns.pdsk > pdsk_max)
1185                ns.pdsk = pdsk_max;
1186
1187        if (ns.pdsk < pdsk_min) {
1188                if (warn)
1189                        *warn = IMPLICITLY_UPGRADED_PDSK;
1190                ns.pdsk = pdsk_min;
1191        }
1192
1193        if (fp == FP_STONITH &&
1194            (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
1195            !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
1196                ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
1197
1198        if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
1199            (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
1200            !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
1201                ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
1202
1203        if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
1204                if (ns.conn == C_SYNC_SOURCE)
1205                        ns.conn = C_PAUSED_SYNC_S;
1206                if (ns.conn == C_SYNC_TARGET)
1207                        ns.conn = C_PAUSED_SYNC_T;
1208        } else {
1209                if (ns.conn == C_PAUSED_SYNC_S)
1210                        ns.conn = C_SYNC_SOURCE;
1211                if (ns.conn == C_PAUSED_SYNC_T)
1212                        ns.conn = C_SYNC_TARGET;
1213        }
1214
1215        return ns;
1216}
1217
1218void drbd_resume_al(struct drbd_device *device)
1219{
1220        if (test_and_clear_bit(AL_SUSPENDED, &device->flags))
1221                drbd_info(device, "Resumed AL updates\n");
1222}
1223
1224/* helper for _drbd_set_state */
1225static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
1226{
1227        if (first_peer_device(device)->connection->agreed_pro_version < 90)
1228                device->ov_start_sector = 0;
1229        device->rs_total = drbd_bm_bits(device);
1230        device->ov_position = 0;
1231        if (cs == C_VERIFY_T) {
1232                /* starting online verify from an arbitrary position
1233                 * does not fit well into the existing protocol.
1234                 * on C_VERIFY_T, we initialize ov_left and friends
1235                 * implicitly in receive_DataRequest once the
1236                 * first P_OV_REQUEST is received */
1237                device->ov_start_sector = ~(sector_t)0;
1238        } else {
1239                unsigned long bit = BM_SECT_TO_BIT(device->ov_start_sector);
1240                if (bit >= device->rs_total) {
1241                        device->ov_start_sector =
1242                                BM_BIT_TO_SECT(device->rs_total - 1);
1243                        device->rs_total = 1;
1244                } else
1245                        device->rs_total -= bit;
1246                device->ov_position = device->ov_start_sector;
1247        }
1248        device->ov_left = device->rs_total;
1249}
1250
1251/**
1252 * _drbd_set_state() - Set a new DRBD state
1253 * @device:     DRBD device.
1254 * @ns:         new state.
1255 * @flags:      Flags
1256 * @done:       Optional completion, that will get completed after the after_state_ch() finished
1257 *
1258 * Caller needs to hold req_lock. Do not call directly.
1259 */
1260enum drbd_state_rv
1261_drbd_set_state(struct drbd_device *device, union drbd_state ns,
1262                enum chg_state_flags flags, struct completion *done)
1263{
1264        struct drbd_peer_device *peer_device = first_peer_device(device);
1265        struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1266        union drbd_state os;
1267        enum drbd_state_rv rv = SS_SUCCESS;
1268        enum sanitize_state_warnings ssw;
1269        struct after_state_chg_work *ascw;
1270        struct drbd_state_change *state_change;
1271
1272        os = drbd_read_state(device);
1273
1274        ns = sanitize_state(device, os, ns, &ssw);
1275        if (ns.i == os.i)
1276                return SS_NOTHING_TO_DO;
1277
1278        rv = is_valid_transition(os, ns);
1279        if (rv < SS_SUCCESS)
1280                return rv;
1281
1282        if (!(flags & CS_HARD)) {
1283                /*  pre-state-change checks ; only look at ns  */
1284                /* See drbd_state_sw_errors in drbd_strings.c */
1285
1286                rv = is_valid_state(device, ns);
1287                if (rv < SS_SUCCESS) {
1288                        /* If the old state was illegal as well, then let
1289                           this happen...*/
1290
1291                        if (is_valid_state(device, os) == rv)
1292                                rv = is_valid_soft_transition(os, ns, connection);
1293                } else
1294                        rv = is_valid_soft_transition(os, ns, connection);
1295        }
1296
1297        if (rv < SS_SUCCESS) {
1298                if (flags & CS_VERBOSE)
1299                        print_st_err(device, os, ns, rv);
1300                return rv;
1301        }
1302
1303        print_sanitize_warnings(device, ssw);
1304
1305        drbd_pr_state_change(device, os, ns, flags);
1306
1307        /* Display changes to the susp* flags that where caused by the call to
1308           sanitize_state(). Only display it here if we where not called from
1309           _conn_request_state() */
1310        if (!(flags & CS_DC_SUSP))
1311                conn_pr_state_change(connection, os, ns,
1312                                     (flags & ~CS_DC_MASK) | CS_DC_SUSP);
1313
1314        /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
1315         * on the ldev here, to be sure the transition -> D_DISKLESS resp.
1316         * drbd_ldev_destroy() won't happen before our corresponding
1317         * after_state_ch works run, where we put_ldev again. */
1318        if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
1319            (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1320                atomic_inc(&device->local_cnt);
1321
1322        if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
1323                clear_bit(RS_DONE, &device->flags);
1324
1325        /* FIXME: Have any flags been set earlier in this function already? */
1326        state_change = remember_old_state(device->resource, GFP_ATOMIC);
1327
1328        /* changes to local_cnt and device flags should be visible before
1329         * changes to state, which again should be visible before anything else
1330         * depending on that change happens. */
1331        smp_wmb();
1332        device->state.i = ns.i;
1333        device->resource->susp = ns.susp;
1334        device->resource->susp_nod = ns.susp_nod;
1335        device->resource->susp_fen = ns.susp_fen;
1336        smp_wmb();
1337
1338        remember_new_state(state_change);
1339
1340        /* put replicated vs not-replicated requests in seperate epochs */
1341        if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
1342            drbd_should_do_remote((union drbd_dev_state)ns.i))
1343                start_new_tl_epoch(connection);
1344
1345        if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1346                drbd_print_uuids(device, "attached to UUIDs");
1347
1348        /* Wake up role changes, that were delayed because of connection establishing */
1349        if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
1350            no_peer_wf_report_params(connection)) {
1351                clear_bit(STATE_SENT, &connection->flags);
1352                wake_up_all_devices(connection);
1353        }
1354
1355        wake_up(&device->misc_wait);
1356        wake_up(&device->state_wait);
1357        wake_up(&connection->ping_wait);
1358
1359        /* Aborted verify run, or we reached the stop sector.
1360         * Log the last position, unless end-of-device. */
1361        if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1362            ns.conn <= C_CONNECTED) {
1363                device->ov_start_sector =
1364                        BM_BIT_TO_SECT(drbd_bm_bits(device) - device->ov_left);
1365                if (device->ov_left)
1366                        drbd_info(device, "Online Verify reached sector %llu\n",
1367                                (unsigned long long)device->ov_start_sector);
1368        }
1369
1370        if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
1371            (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
1372                drbd_info(device, "Syncer continues.\n");
1373                device->rs_paused += (long)jiffies
1374                                  -(long)device->rs_mark_time[device->rs_last_mark];
1375                if (ns.conn == C_SYNC_TARGET)
1376                        mod_timer(&device->resync_timer, jiffies);
1377        }
1378
1379        if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
1380            (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
1381                drbd_info(device, "Resync suspended\n");
1382                device->rs_mark_time[device->rs_last_mark] = jiffies;
1383        }
1384
1385        if (os.conn == C_CONNECTED &&
1386            (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
1387                unsigned long now = jiffies;
1388                int i;
1389
1390                set_ov_position(device, ns.conn);
1391                device->rs_start = now;
1392                device->rs_last_sect_ev = 0;
1393                device->ov_last_oos_size = 0;
1394                device->ov_last_oos_start = 0;
1395
1396                for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1397                        device->rs_mark_left[i] = device->ov_left;
1398                        device->rs_mark_time[i] = now;
1399                }
1400
1401                drbd_rs_controller_reset(device);
1402
1403                if (ns.conn == C_VERIFY_S) {
1404                        drbd_info(device, "Starting Online Verify from sector %llu\n",
1405                                        (unsigned long long)device->ov_position);
1406                        mod_timer(&device->resync_timer, jiffies);
1407                }
1408        }
1409
1410        if (get_ldev(device)) {
1411                u32 mdf = device->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
1412                                                 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
1413                                                 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
1414
1415                mdf &= ~MDF_AL_CLEAN;
1416                if (test_bit(CRASHED_PRIMARY, &device->flags))
1417                        mdf |= MDF_CRASHED_PRIMARY;
1418                if (device->state.role == R_PRIMARY ||
1419                    (device->state.pdsk < D_INCONSISTENT && device->state.peer == R_PRIMARY))
1420                        mdf |= MDF_PRIMARY_IND;
1421                if (device->state.conn > C_WF_REPORT_PARAMS)
1422                        mdf |= MDF_CONNECTED_IND;
1423                if (device->state.disk > D_INCONSISTENT)
1424                        mdf |= MDF_CONSISTENT;
1425                if (device->state.disk > D_OUTDATED)
1426                        mdf |= MDF_WAS_UP_TO_DATE;
1427                if (device->state.pdsk <= D_OUTDATED && device->state.pdsk >= D_INCONSISTENT)
1428                        mdf |= MDF_PEER_OUT_DATED;
1429                if (mdf != device->ldev->md.flags) {
1430                        device->ldev->md.flags = mdf;
1431                        drbd_md_mark_dirty(device);
1432                }
1433                if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
1434                        drbd_set_ed_uuid(device, device->ldev->md.uuid[UI_CURRENT]);
1435                put_ldev(device);
1436        }
1437
1438        /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1439        if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
1440            os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
1441                set_bit(CONSIDER_RESYNC, &device->flags);
1442
1443        /* Receiver should clean up itself */
1444        if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1445                drbd_thread_stop_nowait(&connection->receiver);
1446
1447        /* Now the receiver finished cleaning up itself, it should die */
1448        if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1449                drbd_thread_stop_nowait(&connection->receiver);
1450
1451        /* Upon network failure, we need to restart the receiver. */
1452        if (os.conn > C_WF_CONNECTION &&
1453            ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1454                drbd_thread_restart_nowait(&connection->receiver);
1455
1456        /* Resume AL writing if we get a connection */
1457        if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1458                drbd_resume_al(device);
1459                connection->connect_cnt++;
1460        }
1461
1462        /* remember last attach time so request_timer_fn() won't
1463         * kill newly established sessions while we are still trying to thaw
1464         * previously frozen IO */
1465        if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1466            ns.disk > D_NEGOTIATING)
1467                device->last_reattach_jif = jiffies;
1468
1469        ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1470        if (ascw) {
1471                ascw->os = os;
1472                ascw->ns = ns;
1473                ascw->flags = flags;
1474                ascw->w.cb = w_after_state_ch;
1475                ascw->device = device;
1476                ascw->done = done;
1477                ascw->state_change = state_change;
1478                drbd_queue_work(&connection->sender_work,
1479                                &ascw->w);
1480        } else {
1481                drbd_err(device, "Could not kmalloc an ascw\n");
1482        }
1483
1484        return rv;
1485}
1486
1487static int w_after_state_ch(struct drbd_work *w, int unused)
1488{
1489        struct after_state_chg_work *ascw =
1490                container_of(w, struct after_state_chg_work, w);
1491        struct drbd_device *device = ascw->device;
1492
1493        after_state_ch(device, ascw->os, ascw->ns, ascw->flags, ascw->state_change);
1494        forget_state_change(ascw->state_change);
1495        if (ascw->flags & CS_WAIT_COMPLETE)
1496                complete(ascw->done);
1497        kfree(ascw);
1498
1499        return 0;
1500}
1501
1502static void abw_start_sync(struct drbd_device *device, int rv)
1503{
1504        if (rv) {
1505                drbd_err(device, "Writing the bitmap failed not starting resync.\n");
1506                _drbd_request_state(device, NS(conn, C_CONNECTED), CS_VERBOSE);
1507                return;
1508        }
1509
1510        switch (device->state.conn) {
1511        case C_STARTING_SYNC_T:
1512                _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
1513                break;
1514        case C_STARTING_SYNC_S:
1515                drbd_start_resync(device, C_SYNC_SOURCE);
1516                break;
1517        }
1518}
1519
1520int drbd_bitmap_io_from_worker(struct drbd_device *device,
1521                int (*io_fn)(struct drbd_device *),
1522                char *why, enum bm_flag flags)
1523{
1524        int rv;
1525
1526        D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
1527
1528        /* open coded non-blocking drbd_suspend_io(device); */
1529        atomic_inc(&device->suspend_cnt);
1530
1531        drbd_bm_lock(device, why, flags);
1532        rv = io_fn(device);
1533        drbd_bm_unlock(device);
1534
1535        drbd_resume_io(device);
1536
1537        return rv;
1538}
1539
1540int notify_resource_state_change(struct sk_buff *skb,
1541                                  unsigned int seq,
1542                                  struct drbd_resource_state_change *resource_state_change,
1543                                  enum drbd_notification_type type)
1544{
1545        struct drbd_resource *resource = resource_state_change->resource;
1546        struct resource_info resource_info = {
1547                .res_role = resource_state_change->role[NEW],
1548                .res_susp = resource_state_change->susp[NEW],
1549                .res_susp_nod = resource_state_change->susp_nod[NEW],
1550                .res_susp_fen = resource_state_change->susp_fen[NEW],
1551        };
1552
1553        return notify_resource_state(skb, seq, resource, &resource_info, type);
1554}
1555
1556int notify_connection_state_change(struct sk_buff *skb,
1557                                    unsigned int seq,
1558                                    struct drbd_connection_state_change *connection_state_change,
1559                                    enum drbd_notification_type type)
1560{
1561        struct drbd_connection *connection = connection_state_change->connection;
1562        struct connection_info connection_info = {
1563                .conn_connection_state = connection_state_change->cstate[NEW],
1564                .conn_role = connection_state_change->peer_role[NEW],
1565        };
1566
1567        return notify_connection_state(skb, seq, connection, &connection_info, type);
1568}
1569
1570int notify_device_state_change(struct sk_buff *skb,
1571                                unsigned int seq,
1572                                struct drbd_device_state_change *device_state_change,
1573                                enum drbd_notification_type type)
1574{
1575        struct drbd_device *device = device_state_change->device;
1576        struct device_info device_info = {
1577                .dev_disk_state = device_state_change->disk_state[NEW],
1578        };
1579
1580        return notify_device_state(skb, seq, device, &device_info, type);
1581}
1582
1583int notify_peer_device_state_change(struct sk_buff *skb,
1584                                     unsigned int seq,
1585                                     struct drbd_peer_device_state_change *p,
1586                                     enum drbd_notification_type type)
1587{
1588        struct drbd_peer_device *peer_device = p->peer_device;
1589        struct peer_device_info peer_device_info = {
1590                .peer_repl_state = p->repl_state[NEW],
1591                .peer_disk_state = p->disk_state[NEW],
1592                .peer_resync_susp_user = p->resync_susp_user[NEW],
1593                .peer_resync_susp_peer = p->resync_susp_peer[NEW],
1594                .peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
1595        };
1596
1597        return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
1598}
1599
1600static void broadcast_state_change(struct drbd_state_change *state_change)
1601{
1602        struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
1603        bool resource_state_has_changed;
1604        unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
1605        int (*last_func)(struct sk_buff *, unsigned int, void *,
1606                          enum drbd_notification_type) = NULL;
1607        void *last_arg = NULL;
1608
1609#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
1610#define FINAL_STATE_CHANGE(type) \
1611        ({ if (last_func) \
1612                last_func(NULL, 0, last_arg, type); \
1613        })
1614#define REMEMBER_STATE_CHANGE(func, arg, type) \
1615        ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
1616           last_func = (typeof(last_func))func; \
1617           last_arg = arg; \
1618         })
1619
1620        mutex_lock(&notification_mutex);
1621
1622        resource_state_has_changed =
1623            HAS_CHANGED(resource_state_change->role) ||
1624            HAS_CHANGED(resource_state_change->susp) ||
1625            HAS_CHANGED(resource_state_change->susp_nod) ||
1626            HAS_CHANGED(resource_state_change->susp_fen);
1627
1628        if (resource_state_has_changed)
1629                REMEMBER_STATE_CHANGE(notify_resource_state_change,
1630                                      resource_state_change, NOTIFY_CHANGE);
1631
1632        for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
1633                struct drbd_connection_state_change *connection_state_change =
1634                                &state_change->connections[n_connection];
1635
1636                if (HAS_CHANGED(connection_state_change->peer_role) ||
1637                    HAS_CHANGED(connection_state_change->cstate))
1638                        REMEMBER_STATE_CHANGE(notify_connection_state_change,
1639                                              connection_state_change, NOTIFY_CHANGE);
1640        }
1641
1642        for (n_device = 0; n_device < state_change->n_devices; n_device++) {
1643                struct drbd_device_state_change *device_state_change =
1644                        &state_change->devices[n_device];
1645
1646                if (HAS_CHANGED(device_state_change->disk_state))
1647                        REMEMBER_STATE_CHANGE(notify_device_state_change,
1648                                              device_state_change, NOTIFY_CHANGE);
1649        }
1650
1651        n_peer_devices = state_change->n_devices * state_change->n_connections;
1652        for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
1653                struct drbd_peer_device_state_change *p =
1654                        &state_change->peer_devices[n_peer_device];
1655
1656                if (HAS_CHANGED(p->disk_state) ||
1657                    HAS_CHANGED(p->repl_state) ||
1658                    HAS_CHANGED(p->resync_susp_user) ||
1659                    HAS_CHANGED(p->resync_susp_peer) ||
1660                    HAS_CHANGED(p->resync_susp_dependency))
1661                        REMEMBER_STATE_CHANGE(notify_peer_device_state_change,
1662                                              p, NOTIFY_CHANGE);
1663        }
1664
1665        FINAL_STATE_CHANGE(NOTIFY_CHANGE);
1666        mutex_unlock(&notification_mutex);
1667
1668#undef HAS_CHANGED
1669#undef FINAL_STATE_CHANGE
1670#undef REMEMBER_STATE_CHANGE
1671}
1672
1673/* takes old and new peer disk state */
1674static bool lost_contact_to_peer_data(enum drbd_disk_state os, enum drbd_disk_state ns)
1675{
1676        if ((os >= D_INCONSISTENT && os != D_UNKNOWN && os != D_OUTDATED)
1677        &&  (ns < D_INCONSISTENT || ns == D_UNKNOWN || ns == D_OUTDATED))
1678                return true;
1679
1680        /* Scenario, starting with normal operation
1681         * Connected Primary/Secondary UpToDate/UpToDate
1682         * NetworkFailure Primary/Unknown UpToDate/DUnknown (frozen)
1683         * ...
1684         * Connected Primary/Secondary UpToDate/Diskless (resumed; needs to bump uuid!)
1685         */
1686        if (os == D_UNKNOWN
1687        &&  (ns == D_DISKLESS || ns == D_FAILED || ns == D_OUTDATED))
1688                return true;
1689
1690        return false;
1691}
1692
1693/**
1694 * after_state_ch() - Perform after state change actions that may sleep
1695 * @device:     DRBD device.
1696 * @os:         old state.
1697 * @ns:         new state.
1698 * @flags:      Flags
1699 * @state_change: state change to broadcast
1700 */
1701static void after_state_ch(struct drbd_device *device, union drbd_state os,
1702                           union drbd_state ns, enum chg_state_flags flags,
1703                           struct drbd_state_change *state_change)
1704{
1705        struct drbd_resource *resource = device->resource;
1706        struct drbd_peer_device *peer_device = first_peer_device(device);
1707        struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1708        struct sib_info sib;
1709
1710        broadcast_state_change(state_change);
1711
1712        sib.sib_reason = SIB_STATE_CHANGE;
1713        sib.os = os;
1714        sib.ns = ns;
1715
1716        if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
1717        &&  (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
1718                clear_bit(CRASHED_PRIMARY, &device->flags);
1719                if (device->p_uuid)
1720                        device->p_uuid[UI_FLAGS] &= ~((u64)2);
1721        }
1722
1723        /* Inform userspace about the change... */
1724        drbd_bcast_event(device, &sib);
1725
1726        if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1727            (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1728                drbd_khelper(device, "pri-on-incon-degr");
1729
1730        /* Here we have the actions that are performed after a
1731           state change. This function might sleep */
1732
1733        if (ns.susp_nod) {
1734                enum drbd_req_event what = NOTHING;
1735
1736                spin_lock_irq(&device->resource->req_lock);
1737                if (os.conn < C_CONNECTED && conn_lowest_conn(connection) >= C_CONNECTED)
1738                        what = RESEND;
1739
1740                if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1741                    conn_lowest_disk(connection) == D_UP_TO_DATE)
1742                        what = RESTART_FROZEN_DISK_IO;
1743
1744                if (resource->susp_nod && what != NOTHING) {
1745                        _tl_restart(connection, what);
1746                        _conn_request_state(connection,
1747                                            (union drbd_state) { { .susp_nod = 1 } },
1748                                            (union drbd_state) { { .susp_nod = 0 } },
1749                                            CS_VERBOSE);
1750                }
1751                spin_unlock_irq(&device->resource->req_lock);
1752        }
1753
1754        if (ns.susp_fen) {
1755                spin_lock_irq(&device->resource->req_lock);
1756                if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
1757                        /* case2: The connection was established again: */
1758                        struct drbd_peer_device *peer_device;
1759                        int vnr;
1760
1761                        rcu_read_lock();
1762                        idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1763                                clear_bit(NEW_CUR_UUID, &peer_device->device->flags);
1764                        rcu_read_unlock();
1765
1766                        /* We should actively create a new uuid, _before_
1767                         * we resume/resent, if the peer is diskless
1768                         * (recovery from a multiple error scenario).
1769                         * Currently, this happens with a slight delay
1770                         * below when checking lost_contact_to_peer_data() ...
1771                         */
1772                        _tl_restart(connection, RESEND);
1773                        _conn_request_state(connection,
1774                                            (union drbd_state) { { .susp_fen = 1 } },
1775                                            (union drbd_state) { { .susp_fen = 0 } },
1776                                            CS_VERBOSE);
1777                }
1778                spin_unlock_irq(&device->resource->req_lock);
1779        }
1780
1781        /* Became sync source.  With protocol >= 96, we still need to send out
1782         * the sync uuid now. Need to do that before any drbd_send_state, or
1783         * the other side may go "paused sync" before receiving the sync uuids,
1784         * which is unexpected. */
1785        if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1786            (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1787            connection->agreed_pro_version >= 96 && get_ldev(device)) {
1788                drbd_gen_and_send_sync_uuid(peer_device);
1789                put_ldev(device);
1790        }
1791
1792        /* Do not change the order of the if above and the two below... */
1793        if (os.pdsk == D_DISKLESS &&
1794            ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) {      /* attach on the peer */
1795                /* we probably will start a resync soon.
1796                 * make sure those things are properly reset. */
1797                device->rs_total = 0;
1798                device->rs_failed = 0;
1799                atomic_set(&device->rs_pending_cnt, 0);
1800                drbd_rs_cancel_all(device);
1801
1802                drbd_send_uuids(peer_device);
1803                drbd_send_state(peer_device, ns);
1804        }
1805        /* No point in queuing send_bitmap if we don't have a connection
1806         * anymore, so check also the _current_ state, not only the new state
1807         * at the time this work was queued. */
1808        if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1809            device->state.conn == C_WF_BITMAP_S)
1810                drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL,
1811                                "send_bitmap (WFBitMapS)",
1812                                BM_LOCKED_TEST_ALLOWED);
1813
1814        /* Lost contact to peer's copy of the data */
1815        if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) {
1816                if (get_ldev(device)) {
1817                        if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1818                            device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1819                                if (drbd_suspended(device)) {
1820                                        set_bit(NEW_CUR_UUID, &device->flags);
1821                                } else {
1822                                        drbd_uuid_new_current(device);
1823                                        drbd_send_uuids(peer_device);
1824                                }
1825                        }
1826                        put_ldev(device);
1827                }
1828        }
1829
1830        if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
1831                if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
1832                    device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1833                        drbd_uuid_new_current(device);
1834                        drbd_send_uuids(peer_device);
1835                }
1836                /* D_DISKLESS Peer becomes secondary */
1837                if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1838                        /* We may still be Primary ourselves.
1839                         * No harm done if the bitmap still changes,
1840                         * redirtied pages will follow later. */
1841                        drbd_bitmap_io_from_worker(device, &drbd_bm_write,
1842                                "demote diskless peer", BM_LOCKED_SET_ALLOWED);
1843                put_ldev(device);
1844        }
1845
1846        /* Write out all changed bits on demote.
1847         * Though, no need to da that just yet
1848         * if there is a resync going on still */
1849        if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1850                device->state.conn <= C_CONNECTED && get_ldev(device)) {
1851                /* No changes to the bitmap expected this time, so assert that,
1852                 * even though no harm was done if it did change. */
1853                drbd_bitmap_io_from_worker(device, &drbd_bm_write,
1854                                "demote", BM_LOCKED_TEST_ALLOWED);
1855                put_ldev(device);
1856        }
1857
1858        /* Last part of the attaching process ... */
1859        if (ns.conn >= C_CONNECTED &&
1860            os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1861                drbd_send_sizes(peer_device, 0, 0);  /* to start sync... */
1862                drbd_send_uuids(peer_device);
1863                drbd_send_state(peer_device, ns);
1864        }
1865
1866        /* We want to pause/continue resync, tell peer. */
1867        if (ns.conn >= C_CONNECTED &&
1868             ((os.aftr_isp != ns.aftr_isp) ||
1869              (os.user_isp != ns.user_isp)))
1870                drbd_send_state(peer_device, ns);
1871
1872        /* In case one of the isp bits got set, suspend other devices. */
1873        if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1874            (ns.aftr_isp || ns.peer_isp || ns.user_isp))
1875                suspend_other_sg(device);
1876
1877        /* Make sure the peer gets informed about eventual state
1878           changes (ISP bits) while we were in WFReportParams. */
1879        if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1880                drbd_send_state(peer_device, ns);
1881
1882        if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1883                drbd_send_state(peer_device, ns);
1884
1885        /* We are in the progress to start a full sync... */
1886        if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1887            (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1888                /* no other bitmap changes expected during this phase */
1889                drbd_queue_bitmap_io(device,
1890                        &drbd_bmio_set_n_write, &abw_start_sync,
1891                        "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
1892
1893        /* first half of local IO error, failure to attach,
1894         * or administrative detach */
1895        if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1896                enum drbd_io_error_p eh = EP_PASS_ON;
1897                int was_io_error = 0;
1898                /* corresponding get_ldev was in _drbd_set_state, to serialize
1899                 * our cleanup here with the transition to D_DISKLESS.
1900                 * But is is still not save to dreference ldev here, since
1901                 * we might come from an failed Attach before ldev was set. */
1902                if (device->ldev) {
1903                        rcu_read_lock();
1904                        eh = rcu_dereference(device->ldev->disk_conf)->on_io_error;
1905                        rcu_read_unlock();
1906
1907                        was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);
1908
1909                        /* Intentionally call this handler first, before drbd_send_state().
1910                         * See: 2932204 drbd: call local-io-error handler early
1911                         * People may chose to hard-reset the box from this handler.
1912                         * It is useful if this looks like a "regular node crash". */
1913                        if (was_io_error && eh == EP_CALL_HELPER)
1914                                drbd_khelper(device, "local-io-error");
1915
1916                        /* Immediately allow completion of all application IO,
1917                         * that waits for completion from the local disk,
1918                         * if this was a force-detach due to disk_timeout
1919                         * or administrator request (drbdsetup detach --force).
1920                         * Do NOT abort otherwise.
1921                         * Aborting local requests may cause serious problems,
1922                         * if requests are completed to upper layers already,
1923                         * and then later the already submitted local bio completes.
1924                         * This can cause DMA into former bio pages that meanwhile
1925                         * have been re-used for other things.
1926                         * So aborting local requests may cause crashes,
1927                         * or even worse, silent data corruption.
1928                         */
1929                        if (test_and_clear_bit(FORCE_DETACH, &device->flags))
1930                                tl_abort_disk_io(device);
1931
1932                        /* current state still has to be D_FAILED,
1933                         * there is only one way out: to D_DISKLESS,
1934                         * and that may only happen after our put_ldev below. */
1935                        if (device->state.disk != D_FAILED)
1936                                drbd_err(device,
1937                                        "ASSERT FAILED: disk is %s during detach\n",
1938                                        drbd_disk_str(device->state.disk));
1939
1940                        if (ns.conn >= C_CONNECTED)
1941                                drbd_send_state(peer_device, ns);
1942
1943                        drbd_rs_cancel_all(device);
1944
1945                        /* In case we want to get something to stable storage still,
1946                         * this may be the last chance.
1947                         * Following put_ldev may transition to D_DISKLESS. */
1948                        drbd_md_sync(device);
1949                }
1950                put_ldev(device);
1951        }
1952
1953        /* second half of local IO error, failure to attach,
1954         * or administrative detach,
1955         * after local_cnt references have reached zero again */
1956        if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1957                /* We must still be diskless,
1958                 * re-attach has to be serialized with this! */
1959                if (device->state.disk != D_DISKLESS)
1960                        drbd_err(device,
1961                                 "ASSERT FAILED: disk is %s while going diskless\n",
1962                                 drbd_disk_str(device->state.disk));
1963
1964                if (ns.conn >= C_CONNECTED)
1965                        drbd_send_state(peer_device, ns);
1966                /* corresponding get_ldev in __drbd_set_state
1967                 * this may finally trigger drbd_ldev_destroy. */
1968                put_ldev(device);
1969        }
1970
1971        /* Notify peer that I had a local IO error, and did not detached.. */
1972        if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
1973                drbd_send_state(peer_device, ns);
1974
1975        /* Disks got bigger while they were detached */
1976        if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1977            test_and_clear_bit(RESYNC_AFTER_NEG, &device->flags)) {
1978                if (ns.conn == C_CONNECTED)
1979                        resync_after_online_grow(device);
1980        }
1981
1982        /* A resync finished or aborted, wake paused devices... */
1983        if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1984            (os.peer_isp && !ns.peer_isp) ||
1985            (os.user_isp && !ns.user_isp))
1986                resume_next_sg(device);
1987
1988        /* sync target done with resync.  Explicitly notify peer, even though
1989         * it should (at least for non-empty resyncs) already know itself. */
1990        if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1991                drbd_send_state(peer_device, ns);
1992
1993        /* Verify finished, or reached stop sector.  Peer did not know about
1994         * the stop sector, and we may even have changed the stop sector during
1995         * verify to interrupt/stop early.  Send the new state. */
1996        if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
1997        && verify_can_do_stop_sector(device))
1998                drbd_send_state(peer_device, ns);
1999
2000        /* This triggers bitmap writeout of potentially still unwritten pages
2001         * if the resync finished cleanly, or aborted because of peer disk
2002         * failure, or on transition from resync back to AHEAD/BEHIND.
2003         *
2004         * Connection loss is handled in drbd_disconnected() by the receiver.
2005         *
2006         * For resync aborted because of local disk failure, we cannot do
2007         * any bitmap writeout anymore.
2008         *
2009         * No harm done if some bits change during this phase.
2010         */
2011        if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
2012            (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) {
2013                drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL,
2014                        "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
2015                put_ldev(device);
2016        }
2017
2018        if (ns.disk == D_DISKLESS &&
2019            ns.conn == C_STANDALONE &&
2020            ns.role == R_SECONDARY) {
2021                if (os.aftr_isp != ns.aftr_isp)
2022                        resume_next_sg(device);
2023        }
2024
2025        drbd_md_sync(device);
2026}
2027
2028struct after_conn_state_chg_work {
2029        struct drbd_work w;
2030        enum drbd_conns oc;
2031        union drbd_state ns_min;
2032        union drbd_state ns_max; /* new, max state, over all devices */
2033        enum chg_state_flags flags;
2034        struct drbd_connection *connection;
2035        struct drbd_state_change *state_change;
2036};
2037
2038static int w_after_conn_state_ch(struct drbd_work *w, int unused)
2039{
2040        struct after_conn_state_chg_work *acscw =
2041                container_of(w, struct after_conn_state_chg_work, w);
2042        struct drbd_connection *connection = acscw->connection;
2043        enum drbd_conns oc = acscw->oc;
2044        union drbd_state ns_max = acscw->ns_max;
2045        struct drbd_peer_device *peer_device;
2046        int vnr;
2047
2048        broadcast_state_change(acscw->state_change);
2049        forget_state_change(acscw->state_change);
2050        kfree(acscw);
2051
2052        /* Upon network configuration, we need to start the receiver */
2053        if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
2054                drbd_thread_start(&connection->receiver);
2055
2056        if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
2057                struct net_conf *old_conf;
2058
2059                mutex_lock(&notification_mutex);
2060                idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2061                        notify_peer_device_state(NULL, 0, peer_device, NULL,
2062                                                 NOTIFY_DESTROY | NOTIFY_CONTINUES);
2063                notify_connection_state(NULL, 0, connection, NULL, NOTIFY_DESTROY);
2064                mutex_unlock(&notification_mutex);
2065
2066                mutex_lock(&connection->resource->conf_update);
2067                old_conf = connection->net_conf;
2068                connection->my_addr_len = 0;
2069                connection->peer_addr_len = 0;
2070                RCU_INIT_POINTER(connection->net_conf, NULL);
2071                conn_free_crypto(connection);
2072                mutex_unlock(&connection->resource->conf_update);
2073
2074                synchronize_rcu();
2075                kfree(old_conf);
2076        }
2077
2078        if (ns_max.susp_fen) {
2079                /* case1: The outdate peer handler is successful: */
2080                if (ns_max.pdsk <= D_OUTDATED) {
2081                        rcu_read_lock();
2082                        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2083                                struct drbd_device *device = peer_device->device;
2084                                if (test_bit(NEW_CUR_UUID, &device->flags)) {
2085                                        drbd_uuid_new_current(device);
2086                                        clear_bit(NEW_CUR_UUID, &device->flags);
2087                                }
2088                        }
2089                        rcu_read_unlock();
2090                        spin_lock_irq(&connection->resource->req_lock);
2091                        _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
2092                        _conn_request_state(connection,
2093                                            (union drbd_state) { { .susp_fen = 1 } },
2094                                            (union drbd_state) { { .susp_fen = 0 } },
2095                                            CS_VERBOSE);
2096                        spin_unlock_irq(&connection->resource->req_lock);
2097                }
2098        }
2099        conn_md_sync(connection);
2100        kref_put(&connection->kref, drbd_destroy_connection);
2101
2102        return 0;
2103}
2104
2105static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
2106{
2107        enum chg_state_flags flags = ~0;
2108        struct drbd_peer_device *peer_device;
2109        int vnr, first_vol = 1;
2110        union drbd_dev_state os, cs = {
2111                { .role = R_SECONDARY,
2112                  .peer = R_UNKNOWN,
2113                  .conn = connection->cstate,
2114                  .disk = D_DISKLESS,
2115                  .pdsk = D_UNKNOWN,
2116                } };
2117
2118        rcu_read_lock();
2119        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2120                struct drbd_device *device = peer_device->device;
2121                os = device->state;
2122
2123                if (first_vol) {
2124                        cs = os;
2125                        first_vol = 0;
2126                        continue;
2127                }
2128
2129                if (cs.role != os.role)
2130                        flags &= ~CS_DC_ROLE;
2131
2132                if (cs.peer != os.peer)
2133                        flags &= ~CS_DC_PEER;
2134
2135                if (cs.conn != os.conn)
2136                        flags &= ~CS_DC_CONN;
2137
2138                if (cs.disk != os.disk)
2139                        flags &= ~CS_DC_DISK;
2140
2141                if (cs.pdsk != os.pdsk)
2142                        flags &= ~CS_DC_PDSK;
2143        }
2144        rcu_read_unlock();
2145
2146        *pf |= CS_DC_MASK;
2147        *pf &= flags;
2148        (*pcs).i = cs.i;
2149}
2150
2151static enum drbd_state_rv
2152conn_is_valid_transition(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
2153                         enum chg_state_flags flags)
2154{
2155        enum drbd_state_rv rv = SS_SUCCESS;
2156        union drbd_state ns, os;
2157        struct drbd_peer_device *peer_device;
2158        int vnr;
2159
2160        rcu_read_lock();
2161        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2162                struct drbd_device *device = peer_device->device;
2163                os = drbd_read_state(device);
2164                ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
2165
2166                if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
2167                        ns.disk = os.disk;
2168
2169                if (ns.i == os.i)
2170                        continue;
2171
2172                rv = is_valid_transition(os, ns);
2173
2174                if (rv >= SS_SUCCESS && !(flags & CS_HARD)) {
2175                        rv = is_valid_state(device, ns);
2176                        if (rv < SS_SUCCESS) {
2177                                if (is_valid_state(device, os) == rv)
2178                                        rv = is_valid_soft_transition(os, ns, connection);
2179                        } else
2180                                rv = is_valid_soft_transition(os, ns, connection);
2181                }
2182
2183                if (rv < SS_SUCCESS) {
2184                        if (flags & CS_VERBOSE)
2185                                print_st_err(device, os, ns, rv);
2186                        break;
2187                }
2188        }
2189        rcu_read_unlock();
2190
2191        return rv;
2192}
2193
2194static void
2195conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
2196               union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
2197{
2198        union drbd_state ns, os, ns_max = { };
2199        union drbd_state ns_min = {
2200                { .role = R_MASK,
2201                  .peer = R_MASK,
2202                  .conn = val.conn,
2203                  .disk = D_MASK,
2204                  .pdsk = D_MASK
2205                } };
2206        struct drbd_peer_device *peer_device;
2207        enum drbd_state_rv rv;
2208        int vnr, number_of_volumes = 0;
2209
2210        if (mask.conn == C_MASK) {
2211                /* remember last connect time so request_timer_fn() won't
2212                 * kill newly established sessions while we are still trying to thaw
2213                 * previously frozen IO */
2214                if (connection->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
2215                        connection->last_reconnect_jif = jiffies;
2216
2217                connection->cstate = val.conn;
2218        }
2219
2220        rcu_read_lock();
2221        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2222                struct drbd_device *device = peer_device->device;
2223                number_of_volumes++;
2224                os = drbd_read_state(device);
2225                ns = apply_mask_val(os, mask, val);
2226                ns = sanitize_state(device, os, ns, NULL);
2227
2228                if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
2229                        ns.disk = os.disk;
2230
2231                rv = _drbd_set_state(device, ns, flags, NULL);
2232                BUG_ON(rv < SS_SUCCESS);
2233                ns.i = device->state.i;
2234                ns_max.role = max_role(ns.role, ns_max.role);
2235                ns_max.peer = max_role(ns.peer, ns_max.peer);
2236                ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn);
2237                ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk);
2238                ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk);
2239
2240                ns_min.role = min_role(ns.role, ns_min.role);
2241                ns_min.peer = min_role(ns.peer, ns_min.peer);
2242                ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn);
2243                ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk);
2244                ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk);
2245        }
2246        rcu_read_unlock();
2247
2248        if (number_of_volumes == 0) {
2249                ns_min = ns_max = (union drbd_state) { {
2250                                .role = R_SECONDARY,
2251                                .peer = R_UNKNOWN,
2252                                .conn = val.conn,
2253                                .disk = D_DISKLESS,
2254                                .pdsk = D_UNKNOWN
2255                        } };
2256        }
2257
2258        ns_min.susp = ns_max.susp = connection->resource->susp;
2259        ns_min.susp_nod = ns_max.susp_nod = connection->resource->susp_nod;
2260        ns_min.susp_fen = ns_max.susp_fen = connection->resource->susp_fen;
2261
2262        *pns_min = ns_min;
2263        *pns_max = ns_max;
2264}
2265
2266static enum drbd_state_rv
2267_conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
2268{
2269        enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; /* continue waiting */;
2270
2271        if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags))
2272                rv = SS_CW_SUCCESS;
2273
2274        if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags))
2275                rv = SS_CW_FAILED_BY_PEER;
2276
2277        err = conn_is_valid_transition(connection, mask, val, 0);
2278        if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
2279                return rv;
2280
2281        return err;
2282}
2283
2284enum drbd_state_rv
2285_conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
2286                    enum chg_state_flags flags)
2287{
2288        enum drbd_state_rv rv = SS_SUCCESS;
2289        struct after_conn_state_chg_work *acscw;
2290        enum drbd_conns oc = connection->cstate;
2291        union drbd_state ns_max, ns_min, os;
2292        bool have_mutex = false;
2293        struct drbd_state_change *state_change;
2294
2295        if (mask.conn) {
2296                rv = is_valid_conn_transition(oc, val.conn);
2297                if (rv < SS_SUCCESS)
2298                        goto abort;
2299        }
2300
2301        rv = conn_is_valid_transition(connection, mask, val, flags);
2302        if (rv < SS_SUCCESS)
2303                goto abort;
2304
2305        if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING &&
2306            !(flags & (CS_LOCAL_ONLY | CS_HARD))) {
2307
2308                /* This will be a cluster-wide state change.
2309                 * Need to give up the spinlock, grab the mutex,
2310                 * then send the state change request, ... */
2311                spin_unlock_irq(&connection->resource->req_lock);
2312                mutex_lock(&connection->cstate_mutex);
2313                have_mutex = true;
2314
2315                set_bit(CONN_WD_ST_CHG_REQ, &connection->flags);
2316                if (conn_send_state_req(connection, mask, val)) {
2317                        /* sending failed. */
2318                        clear_bit(CONN_WD_ST_CHG_REQ, &connection->flags);
2319                        rv = SS_CW_FAILED_BY_PEER;
2320                        /* need to re-aquire the spin lock, though */
2321                        goto abort_unlocked;
2322                }
2323
2324                if (val.conn == C_DISCONNECTING)
2325                        set_bit(DISCONNECT_SENT, &connection->flags);
2326
2327                /* ... and re-aquire the spinlock.
2328                 * If _conn_rq_cond() returned >= SS_SUCCESS, we must call
2329                 * conn_set_state() within the same spinlock. */
2330                spin_lock_irq(&connection->resource->req_lock);
2331                wait_event_lock_irq(connection->ping_wait,
2332                                (rv = _conn_rq_cond(connection, mask, val)),
2333                                connection->resource->req_lock);
2334                clear_bit(CONN_WD_ST_CHG_REQ, &connection->flags);
2335                if (rv < SS_SUCCESS)
2336                        goto abort;
2337        }
2338
2339        state_change = remember_old_state(connection->resource, GFP_ATOMIC);
2340        conn_old_common_state(connection, &os, &flags);
2341        flags |= CS_DC_SUSP;
2342        conn_set_state(connection, mask, val, &ns_min, &ns_max, flags);
2343        conn_pr_state_change(connection, os, ns_max, flags);
2344        remember_new_state(state_change);
2345
2346        acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
2347        if (acscw) {
2348                acscw->oc = os.conn;
2349                acscw->ns_min = ns_min;
2350                acscw->ns_max = ns_max;
2351                acscw->flags = flags;
2352                acscw->w.cb = w_after_conn_state_ch;
2353                kref_get(&connection->kref);
2354                acscw->connection = connection;
2355                acscw->state_change = state_change;
2356                drbd_queue_work(&connection->sender_work, &acscw->w);
2357        } else {
2358                drbd_err(connection, "Could not kmalloc an acscw\n");
2359        }
2360
2361 abort:
2362        if (have_mutex) {
2363                /* mutex_unlock() "... must not be used in interrupt context.",
2364                 * so give up the spinlock, then re-aquire it */
2365                spin_unlock_irq(&connection->resource->req_lock);
2366 abort_unlocked:
2367                mutex_unlock(&connection->cstate_mutex);
2368                spin_lock_irq(&connection->resource->req_lock);
2369        }
2370        if (rv < SS_SUCCESS && flags & CS_VERBOSE) {
2371                drbd_err(connection, "State change failed: %s\n", drbd_set_st_err_str(rv));
2372                drbd_err(connection, " mask = 0x%x val = 0x%x\n", mask.i, val.i);
2373                drbd_err(connection, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn));
2374        }
2375        return rv;
2376}
2377
2378enum drbd_state_rv
2379conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
2380                   enum chg_state_flags flags)
2381{
2382        enum drbd_state_rv rv;
2383
2384        spin_lock_irq(&connection->resource->req_lock);
2385        rv = _conn_request_state(connection, mask, val, flags);
2386        spin_unlock_irq(&connection->resource->req_lock);
2387
2388        return rv;
2389}
2390