linux/drivers/block/drbd/drbd_nl.c
<<
>>
Prefs
   1/*
   2   drbd_nl.c
   3
   4   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10   drbd is free software; you can redistribute it and/or modify
  11   it under the terms of the GNU General Public License as published by
  12   the Free Software Foundation; either version 2, or (at your option)
  13   any later version.
  14
  15   drbd is distributed in the hope that it will be useful,
  16   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18   GNU General Public License for more details.
  19
  20   You should have received a copy of the GNU General Public License
  21   along with drbd; see the file COPYING.  If not, write to
  22   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24 */
  25
  26#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  27
  28#include <linux/module.h>
  29#include <linux/drbd.h>
  30#include <linux/in.h>
  31#include <linux/fs.h>
  32#include <linux/file.h>
  33#include <linux/slab.h>
  34#include <linux/blkpg.h>
  35#include <linux/cpumask.h>
  36#include "drbd_int.h"
  37#include "drbd_protocol.h"
  38#include "drbd_req.h"
  39#include <asm/unaligned.h>
  40#include <linux/drbd_limits.h>
  41#include <linux/kthread.h>
  42
  43#include <net/genetlink.h>
  44
  45/* .doit */
  46// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
  47// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
  48
  49int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
  50int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
  51
  52int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
  53int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
  54int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
  55
  56int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
  57int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
  58int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
  59int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
  60int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
  61int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
  62int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
  63int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
  64int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
  65int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
  66int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
  67int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
  68int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
  69int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
  70int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
  71int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
  72int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
  73int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
  74int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
  75int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
  76/* .dumpit */
  77int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
  78
  79#include <linux/drbd_genl_api.h>
  80#include "drbd_nla.h"
  81#include <linux/genl_magic_func.h>
  82
  83/* used blkdev_get_by_path, to claim our meta data device(s) */
  84static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
  85
  86static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
  87{
  88        genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
  89        if (genlmsg_reply(skb, info))
  90                pr_err("error sending genl reply\n");
  91}
  92
  93/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
  94 * reason it could fail was no space in skb, and there are 4k available. */
  95int drbd_msg_put_info(struct sk_buff *skb, const char *info)
  96{
  97        struct nlattr *nla;
  98        int err = -EMSGSIZE;
  99
 100        if (!info || !info[0])
 101                return 0;
 102
 103        nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
 104        if (!nla)
 105                return err;
 106
 107        err = nla_put_string(skb, T_info_text, info);
 108        if (err) {
 109                nla_nest_cancel(skb, nla);
 110                return err;
 111        } else
 112                nla_nest_end(skb, nla);
 113        return 0;
 114}
 115
 116/* This would be a good candidate for a "pre_doit" hook,
 117 * and per-family private info->pointers.
 118 * But we need to stay compatible with older kernels.
 119 * If it returns successfully, adm_ctx members are valid.
 120 *
 121 * At this point, we still rely on the global genl_lock().
 122 * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
 123 * to add additional synchronization against object destruction/modification.
 124 */
 125#define DRBD_ADM_NEED_MINOR     1
 126#define DRBD_ADM_NEED_RESOURCE  2
 127#define DRBD_ADM_NEED_CONNECTION 4
 128static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
 129        struct sk_buff *skb, struct genl_info *info, unsigned flags)
 130{
 131        struct drbd_genlmsghdr *d_in = info->userhdr;
 132        const u8 cmd = info->genlhdr->cmd;
 133        int err;
 134
 135        memset(adm_ctx, 0, sizeof(*adm_ctx));
 136
 137        /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 138        if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 139               return -EPERM;
 140
 141        adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 142        if (!adm_ctx->reply_skb) {
 143                err = -ENOMEM;
 144                goto fail;
 145        }
 146
 147        adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
 148                                        info, &drbd_genl_family, 0, cmd);
 149        /* put of a few bytes into a fresh skb of >= 4k will always succeed.
 150         * but anyways */
 151        if (!adm_ctx->reply_dh) {
 152                err = -ENOMEM;
 153                goto fail;
 154        }
 155
 156        adm_ctx->reply_dh->minor = d_in->minor;
 157        adm_ctx->reply_dh->ret_code = NO_ERROR;
 158
 159        adm_ctx->volume = VOLUME_UNSPECIFIED;
 160        if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 161                struct nlattr *nla;
 162                /* parse and validate only */
 163                err = drbd_cfg_context_from_attrs(NULL, info);
 164                if (err)
 165                        goto fail;
 166
 167                /* It was present, and valid,
 168                 * copy it over to the reply skb. */
 169                err = nla_put_nohdr(adm_ctx->reply_skb,
 170                                info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
 171                                info->attrs[DRBD_NLA_CFG_CONTEXT]);
 172                if (err)
 173                        goto fail;
 174
 175                /* and assign stuff to the adm_ctx */
 176                nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 177                if (nla)
 178                        adm_ctx->volume = nla_get_u32(nla);
 179                nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 180                if (nla)
 181                        adm_ctx->resource_name = nla_data(nla);
 182                adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
 183                adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
 184                if ((adm_ctx->my_addr &&
 185                     nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
 186                    (adm_ctx->peer_addr &&
 187                     nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
 188                        err = -EINVAL;
 189                        goto fail;
 190                }
 191        }
 192
 193        adm_ctx->minor = d_in->minor;
 194        adm_ctx->device = minor_to_device(d_in->minor);
 195
 196        /* We are protected by the global genl_lock().
 197         * But we may explicitly drop it/retake it in drbd_adm_set_role(),
 198         * so make sure this object stays around. */
 199        if (adm_ctx->device)
 200                kref_get(&adm_ctx->device->kref);
 201
 202        if (adm_ctx->resource_name) {
 203                adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
 204        }
 205
 206        if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
 207                drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
 208                return ERR_MINOR_INVALID;
 209        }
 210        if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
 211                drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
 212                if (adm_ctx->resource_name)
 213                        return ERR_RES_NOT_KNOWN;
 214                return ERR_INVALID_REQUEST;
 215        }
 216
 217        if (flags & DRBD_ADM_NEED_CONNECTION) {
 218                if (adm_ctx->resource) {
 219                        drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
 220                        return ERR_INVALID_REQUEST;
 221                }
 222                if (adm_ctx->device) {
 223                        drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
 224                        return ERR_INVALID_REQUEST;
 225                }
 226                if (adm_ctx->my_addr && adm_ctx->peer_addr)
 227                        adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
 228                                                          nla_len(adm_ctx->my_addr),
 229                                                          nla_data(adm_ctx->peer_addr),
 230                                                          nla_len(adm_ctx->peer_addr));
 231                if (!adm_ctx->connection) {
 232                        drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
 233                        return ERR_INVALID_REQUEST;
 234                }
 235        }
 236
 237        /* some more paranoia, if the request was over-determined */
 238        if (adm_ctx->device && adm_ctx->resource &&
 239            adm_ctx->device->resource != adm_ctx->resource) {
 240                pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
 241                                adm_ctx->minor, adm_ctx->resource->name,
 242                                adm_ctx->device->resource->name);
 243                drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
 244                return ERR_INVALID_REQUEST;
 245        }
 246        if (adm_ctx->device &&
 247            adm_ctx->volume != VOLUME_UNSPECIFIED &&
 248            adm_ctx->volume != adm_ctx->device->vnr) {
 249                pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
 250                                adm_ctx->minor, adm_ctx->volume,
 251                                adm_ctx->device->vnr,
 252                                adm_ctx->device->resource->name);
 253                drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
 254                return ERR_INVALID_REQUEST;
 255        }
 256
 257        /* still, provide adm_ctx->resource always, if possible. */
 258        if (!adm_ctx->resource) {
 259                adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
 260                        : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
 261                if (adm_ctx->resource)
 262                        kref_get(&adm_ctx->resource->kref);
 263        }
 264
 265        return NO_ERROR;
 266
 267fail:
 268        nlmsg_free(adm_ctx->reply_skb);
 269        adm_ctx->reply_skb = NULL;
 270        return err;
 271}
 272
 273static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
 274        struct genl_info *info, int retcode)
 275{
 276        if (adm_ctx->device) {
 277                kref_put(&adm_ctx->device->kref, drbd_destroy_device);
 278                adm_ctx->device = NULL;
 279        }
 280        if (adm_ctx->connection) {
 281                kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
 282                adm_ctx->connection = NULL;
 283        }
 284        if (adm_ctx->resource) {
 285                kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
 286                adm_ctx->resource = NULL;
 287        }
 288
 289        if (!adm_ctx->reply_skb)
 290                return -ENOMEM;
 291
 292        adm_ctx->reply_dh->ret_code = retcode;
 293        drbd_adm_send_reply(adm_ctx->reply_skb, info);
 294        return 0;
 295}
 296
 297static void setup_khelper_env(struct drbd_connection *connection, char **envp)
 298{
 299        char *afs;
 300
 301        /* FIXME: A future version will not allow this case. */
 302        if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
 303                return;
 304
 305        switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
 306        case AF_INET6:
 307                afs = "ipv6";
 308                snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
 309                         &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
 310                break;
 311        case AF_INET:
 312                afs = "ipv4";
 313                snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 314                         &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 315                break;
 316        default:
 317                afs = "ssocks";
 318                snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 319                         &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 320        }
 321        snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
 322}
 323
 324int drbd_khelper(struct drbd_device *device, char *cmd)
 325{
 326        char *envp[] = { "HOME=/",
 327                        "TERM=linux",
 328                        "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 329                         (char[20]) { }, /* address family */
 330                         (char[60]) { }, /* address */
 331                        NULL };
 332        char mb[12];
 333        char *argv[] = {usermode_helper, cmd, mb, NULL };
 334        struct drbd_connection *connection = first_peer_device(device)->connection;
 335        struct sib_info sib;
 336        int ret;
 337
 338        if (current == connection->worker.task)
 339                set_bit(CALLBACK_PENDING, &connection->flags);
 340
 341        snprintf(mb, 12, "minor-%d", device_to_minor(device));
 342        setup_khelper_env(connection, envp);
 343
 344        /* The helper may take some time.
 345         * write out any unsynced meta data changes now */
 346        drbd_md_sync(device);
 347
 348        drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 349        sib.sib_reason = SIB_HELPER_PRE;
 350        sib.helper_name = cmd;
 351        drbd_bcast_event(device, &sib);
 352        ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 353        if (ret)
 354                drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 355                                usermode_helper, cmd, mb,
 356                                (ret >> 8) & 0xff, ret);
 357        else
 358                drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 359                                usermode_helper, cmd, mb,
 360                                (ret >> 8) & 0xff, ret);
 361        sib.sib_reason = SIB_HELPER_POST;
 362        sib.helper_exit_code = ret;
 363        drbd_bcast_event(device, &sib);
 364
 365        if (current == connection->worker.task)
 366                clear_bit(CALLBACK_PENDING, &connection->flags);
 367
 368        if (ret < 0) /* Ignore any ERRNOs we got. */
 369                ret = 0;
 370
 371        return ret;
 372}
 373
 374static int conn_khelper(struct drbd_connection *connection, char *cmd)
 375{
 376        char *envp[] = { "HOME=/",
 377                        "TERM=linux",
 378                        "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 379                         (char[20]) { }, /* address family */
 380                         (char[60]) { }, /* address */
 381                        NULL };
 382        char *resource_name = connection->resource->name;
 383        char *argv[] = {usermode_helper, cmd, resource_name, NULL };
 384        int ret;
 385
 386        setup_khelper_env(connection, envp);
 387        conn_md_sync(connection);
 388
 389        drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
 390        /* TODO: conn_bcast_event() ?? */
 391
 392        ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 393        if (ret)
 394                drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 395                          usermode_helper, cmd, resource_name,
 396                          (ret >> 8) & 0xff, ret);
 397        else
 398                drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 399                          usermode_helper, cmd, resource_name,
 400                          (ret >> 8) & 0xff, ret);
 401        /* TODO: conn_bcast_event() ?? */
 402
 403        if (ret < 0) /* Ignore any ERRNOs we got. */
 404                ret = 0;
 405
 406        return ret;
 407}
 408
 409static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
 410{
 411        enum drbd_fencing_p fp = FP_NOT_AVAIL;
 412        struct drbd_peer_device *peer_device;
 413        int vnr;
 414
 415        rcu_read_lock();
 416        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 417                struct drbd_device *device = peer_device->device;
 418                if (get_ldev_if_state(device, D_CONSISTENT)) {
 419                        struct disk_conf *disk_conf =
 420                                rcu_dereference(peer_device->device->ldev->disk_conf);
 421                        fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
 422                        put_ldev(device);
 423                }
 424        }
 425        rcu_read_unlock();
 426
 427        if (fp == FP_NOT_AVAIL) {
 428                /* IO Suspending works on the whole resource.
 429                   Do it only for one device. */
 430                vnr = 0;
 431                peer_device = idr_get_next(&connection->peer_devices, &vnr);
 432                drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
 433        }
 434
 435        return fp;
 436}
 437
 438bool conn_try_outdate_peer(struct drbd_connection *connection)
 439{
 440        unsigned int connect_cnt;
 441        union drbd_state mask = { };
 442        union drbd_state val = { };
 443        enum drbd_fencing_p fp;
 444        char *ex_to_string;
 445        int r;
 446
 447        spin_lock_irq(&connection->resource->req_lock);
 448        if (connection->cstate >= C_WF_REPORT_PARAMS) {
 449                drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
 450                spin_unlock_irq(&connection->resource->req_lock);
 451                return false;
 452        }
 453
 454        connect_cnt = connection->connect_cnt;
 455        spin_unlock_irq(&connection->resource->req_lock);
 456
 457        fp = highest_fencing_policy(connection);
 458        switch (fp) {
 459        case FP_NOT_AVAIL:
 460                drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
 461                goto out;
 462        case FP_DONT_CARE:
 463                return true;
 464        default: ;
 465        }
 466
 467        r = conn_khelper(connection, "fence-peer");
 468
 469        switch ((r>>8) & 0xff) {
 470        case 3: /* peer is inconsistent */
 471                ex_to_string = "peer is inconsistent or worse";
 472                mask.pdsk = D_MASK;
 473                val.pdsk = D_INCONSISTENT;
 474                break;
 475        case 4: /* peer got outdated, or was already outdated */
 476                ex_to_string = "peer was fenced";
 477                mask.pdsk = D_MASK;
 478                val.pdsk = D_OUTDATED;
 479                break;
 480        case 5: /* peer was down */
 481                if (conn_highest_disk(connection) == D_UP_TO_DATE) {
 482                        /* we will(have) create(d) a new UUID anyways... */
 483                        ex_to_string = "peer is unreachable, assumed to be dead";
 484                        mask.pdsk = D_MASK;
 485                        val.pdsk = D_OUTDATED;
 486                } else {
 487                        ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
 488                }
 489                break;
 490        case 6: /* Peer is primary, voluntarily outdate myself.
 491                 * This is useful when an unconnected R_SECONDARY is asked to
 492                 * become R_PRIMARY, but finds the other peer being active. */
 493                ex_to_string = "peer is active";
 494                drbd_warn(connection, "Peer is primary, outdating myself.\n");
 495                mask.disk = D_MASK;
 496                val.disk = D_OUTDATED;
 497                break;
 498        case 7:
 499                if (fp != FP_STONITH)
 500                        drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
 501                ex_to_string = "peer was stonithed";
 502                mask.pdsk = D_MASK;
 503                val.pdsk = D_OUTDATED;
 504                break;
 505        default:
 506                /* The script is broken ... */
 507                drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
 508                return false; /* Eventually leave IO frozen */
 509        }
 510
 511        drbd_info(connection, "fence-peer helper returned %d (%s)\n",
 512                  (r>>8) & 0xff, ex_to_string);
 513
 514 out:
 515
 516        /* Not using
 517           conn_request_state(connection, mask, val, CS_VERBOSE);
 518           here, because we might were able to re-establish the connection in the
 519           meantime. */
 520        spin_lock_irq(&connection->resource->req_lock);
 521        if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
 522                if (connection->connect_cnt != connect_cnt)
 523                        /* In case the connection was established and droped
 524                           while the fence-peer handler was running, ignore it */
 525                        drbd_info(connection, "Ignoring fence-peer exit code\n");
 526                else
 527                        _conn_request_state(connection, mask, val, CS_VERBOSE);
 528        }
 529        spin_unlock_irq(&connection->resource->req_lock);
 530
 531        return conn_highest_pdsk(connection) <= D_OUTDATED;
 532}
 533
 534static int _try_outdate_peer_async(void *data)
 535{
 536        struct drbd_connection *connection = (struct drbd_connection *)data;
 537
 538        conn_try_outdate_peer(connection);
 539
 540        kref_put(&connection->kref, drbd_destroy_connection);
 541        return 0;
 542}
 543
 544void conn_try_outdate_peer_async(struct drbd_connection *connection)
 545{
 546        struct task_struct *opa;
 547
 548        kref_get(&connection->kref);
 549        /* We may just have force_sig()'ed this thread
 550         * to get it out of some blocking network function.
 551         * Clear signals; otherwise kthread_run(), which internally uses
 552         * wait_on_completion_killable(), will mistake our pending signal
 553         * for a new fatal signal and fail. */
 554        flush_signals(current);
 555        opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
 556        if (IS_ERR(opa)) {
 557                drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
 558                kref_put(&connection->kref, drbd_destroy_connection);
 559        }
 560}
 561
 562enum drbd_state_rv
 563drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
 564{
 565        struct drbd_peer_device *const peer_device = first_peer_device(device);
 566        struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
 567        const int max_tries = 4;
 568        enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
 569        struct net_conf *nc;
 570        int try = 0;
 571        int forced = 0;
 572        union drbd_state mask, val;
 573
 574        if (new_role == R_PRIMARY) {
 575                struct drbd_connection *connection;
 576
 577                /* Detect dead peers as soon as possible.  */
 578
 579                rcu_read_lock();
 580                for_each_connection(connection, device->resource)
 581                        request_ping(connection);
 582                rcu_read_unlock();
 583        }
 584
 585        mutex_lock(device->state_mutex);
 586
 587        mask.i = 0; mask.role = R_MASK;
 588        val.i  = 0; val.role  = new_role;
 589
 590        while (try++ < max_tries) {
 591                rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
 592
 593                /* in case we first succeeded to outdate,
 594                 * but now suddenly could establish a connection */
 595                if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 596                        val.pdsk = 0;
 597                        mask.pdsk = 0;
 598                        continue;
 599                }
 600
 601                if (rv == SS_NO_UP_TO_DATE_DISK && force &&
 602                    (device->state.disk < D_UP_TO_DATE &&
 603                     device->state.disk >= D_INCONSISTENT)) {
 604                        mask.disk = D_MASK;
 605                        val.disk  = D_UP_TO_DATE;
 606                        forced = 1;
 607                        continue;
 608                }
 609
 610                if (rv == SS_NO_UP_TO_DATE_DISK &&
 611                    device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 612                        D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
 613
 614                        if (conn_try_outdate_peer(connection)) {
 615                                val.disk = D_UP_TO_DATE;
 616                                mask.disk = D_MASK;
 617                        }
 618                        continue;
 619                }
 620
 621                if (rv == SS_NOTHING_TO_DO)
 622                        goto out;
 623                if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 624                        if (!conn_try_outdate_peer(connection) && force) {
 625                                drbd_warn(device, "Forced into split brain situation!\n");
 626                                mask.pdsk = D_MASK;
 627                                val.pdsk  = D_OUTDATED;
 628
 629                        }
 630                        continue;
 631                }
 632                if (rv == SS_TWO_PRIMARIES) {
 633                        /* Maybe the peer is detected as dead very soon...
 634                           retry at most once more in this case. */
 635                        int timeo;
 636                        rcu_read_lock();
 637                        nc = rcu_dereference(connection->net_conf);
 638                        timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
 639                        rcu_read_unlock();
 640                        schedule_timeout_interruptible(timeo);
 641                        if (try < max_tries)
 642                                try = max_tries - 1;
 643                        continue;
 644                }
 645                if (rv < SS_SUCCESS) {
 646                        rv = _drbd_request_state(device, mask, val,
 647                                                CS_VERBOSE + CS_WAIT_COMPLETE);
 648                        if (rv < SS_SUCCESS)
 649                                goto out;
 650                }
 651                break;
 652        }
 653
 654        if (rv < SS_SUCCESS)
 655                goto out;
 656
 657        if (forced)
 658                drbd_warn(device, "Forced to consider local data as UpToDate!\n");
 659
 660        /* Wait until nothing is on the fly :) */
 661        wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
 662
 663        /* FIXME also wait for all pending P_BARRIER_ACK? */
 664
 665        if (new_role == R_SECONDARY) {
 666                if (get_ldev(device)) {
 667                        device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 668                        put_ldev(device);
 669                }
 670        } else {
 671                mutex_lock(&device->resource->conf_update);
 672                nc = connection->net_conf;
 673                if (nc)
 674                        nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 675                mutex_unlock(&device->resource->conf_update);
 676
 677                if (get_ldev(device)) {
 678                        if (((device->state.conn < C_CONNECTED ||
 679                               device->state.pdsk <= D_FAILED)
 680                              && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
 681                                drbd_uuid_new_current(device);
 682
 683                        device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 684                        put_ldev(device);
 685                }
 686        }
 687
 688        /* writeout of activity log covered areas of the bitmap
 689         * to stable storage done in after state change already */
 690
 691        if (device->state.conn >= C_WF_REPORT_PARAMS) {
 692                /* if this was forced, we should consider sync */
 693                if (forced)
 694                        drbd_send_uuids(peer_device);
 695                drbd_send_current_state(peer_device);
 696        }
 697
 698        drbd_md_sync(device);
 699        set_disk_ro(device->vdisk, new_role == R_SECONDARY);
 700        kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 701out:
 702        mutex_unlock(device->state_mutex);
 703        return rv;
 704}
 705
 706static const char *from_attrs_err_to_txt(int err)
 707{
 708        return  err == -ENOMSG ? "required attribute missing" :
 709                err == -EOPNOTSUPP ? "unknown mandatory attribute" :
 710                err == -EEXIST ? "can not change invariant setting" :
 711                "invalid attribute value";
 712}
 713
 714int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 715{
 716        struct drbd_config_context adm_ctx;
 717        struct set_role_parms parms;
 718        int err;
 719        enum drbd_ret_code retcode;
 720
 721        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 722        if (!adm_ctx.reply_skb)
 723                return retcode;
 724        if (retcode != NO_ERROR)
 725                goto out;
 726
 727        memset(&parms, 0, sizeof(parms));
 728        if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
 729                err = set_role_parms_from_attrs(&parms, info);
 730                if (err) {
 731                        retcode = ERR_MANDATORY_TAG;
 732                        drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 733                        goto out;
 734                }
 735        }
 736        genl_unlock();
 737        mutex_lock(&adm_ctx.resource->adm_mutex);
 738
 739        if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
 740                retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
 741        else
 742                retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 743
 744        mutex_unlock(&adm_ctx.resource->adm_mutex);
 745        genl_lock();
 746out:
 747        drbd_adm_finish(&adm_ctx, info, retcode);
 748        return 0;
 749}
 750
 751/* Initializes the md.*_offset members, so we are able to find
 752 * the on disk meta data.
 753 *
 754 * We currently have two possible layouts:
 755 * external:
 756 *   |----------- md_size_sect ------------------|
 757 *   [ 4k superblock ][ activity log ][  Bitmap  ]
 758 *   | al_offset == 8 |
 759 *   | bm_offset = al_offset + X      |
 760 *  ==> bitmap sectors = md_size_sect - bm_offset
 761 *
 762 * internal:
 763 *            |----------- md_size_sect ------------------|
 764 * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
 765 *                        | al_offset < 0 |
 766 *            | bm_offset = al_offset - Y |
 767 *  ==> bitmap sectors = Y = al_offset - bm_offset
 768 *
 769 *  Activity log size used to be fixed 32kB,
 770 *  but is about to become configurable.
 771 */
 772static void drbd_md_set_sector_offsets(struct drbd_device *device,
 773                                       struct drbd_backing_dev *bdev)
 774{
 775        sector_t md_size_sect = 0;
 776        unsigned int al_size_sect = bdev->md.al_size_4k * 8;
 777
 778        bdev->md.md_offset = drbd_md_ss(bdev);
 779
 780        switch (bdev->md.meta_dev_idx) {
 781        default:
 782                /* v07 style fixed size indexed meta data */
 783                bdev->md.md_size_sect = MD_128MB_SECT;
 784                bdev->md.al_offset = MD_4kB_SECT;
 785                bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 786                break;
 787        case DRBD_MD_INDEX_FLEX_EXT:
 788                /* just occupy the full device; unit: sectors */
 789                bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
 790                bdev->md.al_offset = MD_4kB_SECT;
 791                bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 792                break;
 793        case DRBD_MD_INDEX_INTERNAL:
 794        case DRBD_MD_INDEX_FLEX_INT:
 795                /* al size is still fixed */
 796                bdev->md.al_offset = -al_size_sect;
 797                /* we need (slightly less than) ~ this much bitmap sectors: */
 798                md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 799                md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
 800                md_size_sect = BM_SECT_TO_EXT(md_size_sect);
 801                md_size_sect = ALIGN(md_size_sect, 8);
 802
 803                /* plus the "drbd meta data super block",
 804                 * and the activity log; */
 805                md_size_sect += MD_4kB_SECT + al_size_sect;
 806
 807                bdev->md.md_size_sect = md_size_sect;
 808                /* bitmap offset is adjusted by 'super' block size */
 809                bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
 810                break;
 811        }
 812}
 813
 814/* input size is expected to be in KB */
 815char *ppsize(char *buf, unsigned long long size)
 816{
 817        /* Needs 9 bytes at max including trailing NUL:
 818         * -1ULL ==> "16384 EB" */
 819        static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 820        int base = 0;
 821        while (size >= 10000 && base < sizeof(units)-1) {
 822                /* shift + round */
 823                size = (size >> 10) + !!(size & (1<<9));
 824                base++;
 825        }
 826        sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 827
 828        return buf;
 829}
 830
 831/* there is still a theoretical deadlock when called from receiver
 832 * on an D_INCONSISTENT R_PRIMARY:
 833 *  remote READ does inc_ap_bio, receiver would need to receive answer
 834 *  packet from remote to dec_ap_bio again.
 835 *  receiver receive_sizes(), comes here,
 836 *  waits for ap_bio_cnt == 0. -> deadlock.
 837 * but this cannot happen, actually, because:
 838 *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
 839 *  (not connected, or bad/no disk on peer):
 840 *  see drbd_fail_request_early, ap_bio_cnt is zero.
 841 *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
 842 *  peer may not initiate a resize.
 843 */
 844/* Note these are not to be confused with
 845 * drbd_adm_suspend_io/drbd_adm_resume_io,
 846 * which are (sub) state changes triggered by admin (drbdsetup),
 847 * and can be long lived.
 848 * This changes an device->flag, is triggered by drbd internals,
 849 * and should be short-lived. */
 850void drbd_suspend_io(struct drbd_device *device)
 851{
 852        set_bit(SUSPEND_IO, &device->flags);
 853        if (drbd_suspended(device))
 854                return;
 855        wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
 856}
 857
 858void drbd_resume_io(struct drbd_device *device)
 859{
 860        clear_bit(SUSPEND_IO, &device->flags);
 861        wake_up(&device->misc_wait);
 862}
 863
 864/**
 865 * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
 866 * @device:     DRBD device.
 867 *
 868 * Returns 0 on success, negative return values indicate errors.
 869 * You should call drbd_md_sync() after calling this function.
 870 */
 871enum determine_dev_size
 872drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 873{
 874        sector_t prev_first_sect, prev_size; /* previous meta location */
 875        sector_t la_size_sect, u_size;
 876        struct drbd_md *md = &device->ldev->md;
 877        u32 prev_al_stripe_size_4k;
 878        u32 prev_al_stripes;
 879        sector_t size;
 880        char ppb[10];
 881        void *buffer;
 882
 883        int md_moved, la_size_changed;
 884        enum determine_dev_size rv = DS_UNCHANGED;
 885
 886        /* race:
 887         * application request passes inc_ap_bio,
 888         * but then cannot get an AL-reference.
 889         * this function later may wait on ap_bio_cnt == 0. -> deadlock.
 890         *
 891         * to avoid that:
 892         * Suspend IO right here.
 893         * still lock the act_log to not trigger ASSERTs there.
 894         */
 895        drbd_suspend_io(device);
 896        buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
 897        if (!buffer) {
 898                drbd_resume_io(device);
 899                return DS_ERROR;
 900        }
 901
 902        /* no wait necessary anymore, actually we could assert that */
 903        wait_event(device->al_wait, lc_try_lock(device->act_log));
 904
 905        prev_first_sect = drbd_md_first_sector(device->ldev);
 906        prev_size = device->ldev->md.md_size_sect;
 907        la_size_sect = device->ldev->md.la_size_sect;
 908
 909        if (rs) {
 910                /* rs is non NULL if we should change the AL layout only */
 911
 912                prev_al_stripes = md->al_stripes;
 913                prev_al_stripe_size_4k = md->al_stripe_size_4k;
 914
 915                md->al_stripes = rs->al_stripes;
 916                md->al_stripe_size_4k = rs->al_stripe_size / 4;
 917                md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
 918        }
 919
 920        drbd_md_set_sector_offsets(device, device->ldev);
 921
 922        rcu_read_lock();
 923        u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
 924        rcu_read_unlock();
 925        size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
 926
 927        if (size < la_size_sect) {
 928                if (rs && u_size == 0) {
 929                        /* Remove "rs &&" later. This check should always be active, but
 930                           right now the receiver expects the permissive behavior */
 931                        drbd_warn(device, "Implicit shrink not allowed. "
 932                                 "Use --size=%llus for explicit shrink.\n",
 933                                 (unsigned long long)size);
 934                        rv = DS_ERROR_SHRINK;
 935                }
 936                if (u_size > size)
 937                        rv = DS_ERROR_SPACE_MD;
 938                if (rv != DS_UNCHANGED)
 939                        goto err_out;
 940        }
 941
 942        if (drbd_get_capacity(device->this_bdev) != size ||
 943            drbd_bm_capacity(device) != size) {
 944                int err;
 945                err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
 946                if (unlikely(err)) {
 947                        /* currently there is only one error: ENOMEM! */
 948                        size = drbd_bm_capacity(device)>>1;
 949                        if (size == 0) {
 950                                drbd_err(device, "OUT OF MEMORY! "
 951                                    "Could not allocate bitmap!\n");
 952                        } else {
 953                                drbd_err(device, "BM resizing failed. "
 954                                    "Leaving size unchanged at size = %lu KB\n",
 955                                    (unsigned long)size);
 956                        }
 957                        rv = DS_ERROR;
 958                }
 959                /* racy, see comments above. */
 960                drbd_set_my_capacity(device, size);
 961                device->ldev->md.la_size_sect = size;
 962                drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 963                     (unsigned long long)size>>1);
 964        }
 965        if (rv <= DS_ERROR)
 966                goto err_out;
 967
 968        la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
 969
 970        md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
 971                || prev_size       != device->ldev->md.md_size_sect;
 972
 973        if (la_size_changed || md_moved || rs) {
 974                u32 prev_flags;
 975
 976                /* We do some synchronous IO below, which may take some time.
 977                 * Clear the timer, to avoid scary "timer expired!" messages,
 978                 * "Superblock" is written out at least twice below, anyways. */
 979                del_timer(&device->md_sync_timer);
 980                drbd_al_shrink(device); /* All extents inactive. */
 981
 982                prev_flags = md->flags;
 983                md->flags &= ~MDF_PRIMARY_IND;
 984                drbd_md_write(device, buffer);
 985
 986                drbd_info(device, "Writing the whole bitmap, %s\n",
 987                         la_size_changed && md_moved ? "size changed and md moved" :
 988                         la_size_changed ? "size changed" : "md moved");
 989                /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
 990                drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
 991                               "size changed", BM_LOCKED_MASK);
 992                drbd_initialize_al(device, buffer);
 993
 994                md->flags = prev_flags;
 995                drbd_md_write(device, buffer);
 996
 997                if (rs)
 998                        drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
 999                                  md->al_stripes, md->al_stripe_size_4k * 4);
1000        }
1001
1002        if (size > la_size_sect)
1003                rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
1004        if (size < la_size_sect)
1005                rv = DS_SHRUNK;
1006
1007        if (0) {
1008        err_out:
1009                if (rs) {
1010                        md->al_stripes = prev_al_stripes;
1011                        md->al_stripe_size_4k = prev_al_stripe_size_4k;
1012                        md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1013
1014                        drbd_md_set_sector_offsets(device, device->ldev);
1015                }
1016        }
1017        lc_unlock(device->act_log);
1018        wake_up(&device->al_wait);
1019        drbd_md_put_buffer(device);
1020        drbd_resume_io(device);
1021
1022        return rv;
1023}
1024
1025sector_t
1026drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1027                  sector_t u_size, int assume_peer_has_space)
1028{
1029        sector_t p_size = device->p_size;   /* partner's disk size. */
1030        sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1031        sector_t m_size; /* my size */
1032        sector_t size = 0;
1033
1034        m_size = drbd_get_max_capacity(bdev);
1035
1036        if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1037                drbd_warn(device, "Resize while not connected was forced by the user!\n");
1038                p_size = m_size;
1039        }
1040
1041        if (p_size && m_size) {
1042                size = min_t(sector_t, p_size, m_size);
1043        } else {
1044                if (la_size_sect) {
1045                        size = la_size_sect;
1046                        if (m_size && m_size < size)
1047                                size = m_size;
1048                        if (p_size && p_size < size)
1049                                size = p_size;
1050                } else {
1051                        if (m_size)
1052                                size = m_size;
1053                        if (p_size)
1054                                size = p_size;
1055                }
1056        }
1057
1058        if (size == 0)
1059                drbd_err(device, "Both nodes diskless!\n");
1060
1061        if (u_size) {
1062                if (u_size > size)
1063                        drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1064                            (unsigned long)u_size>>1, (unsigned long)size>>1);
1065                else
1066                        size = u_size;
1067        }
1068
1069        return size;
1070}
1071
1072/**
1073 * drbd_check_al_size() - Ensures that the AL is of the right size
1074 * @device:     DRBD device.
1075 *
1076 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1077 * failed, and 0 on success. You should call drbd_md_sync() after you called
1078 * this function.
1079 */
1080static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1081{
1082        struct lru_cache *n, *t;
1083        struct lc_element *e;
1084        unsigned int in_use;
1085        int i;
1086
1087        if (device->act_log &&
1088            device->act_log->nr_elements == dc->al_extents)
1089                return 0;
1090
1091        in_use = 0;
1092        t = device->act_log;
1093        n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1094                dc->al_extents, sizeof(struct lc_element), 0);
1095
1096        if (n == NULL) {
1097                drbd_err(device, "Cannot allocate act_log lru!\n");
1098                return -ENOMEM;
1099        }
1100        spin_lock_irq(&device->al_lock);
1101        if (t) {
1102                for (i = 0; i < t->nr_elements; i++) {
1103                        e = lc_element_by_index(t, i);
1104                        if (e->refcnt)
1105                                drbd_err(device, "refcnt(%d)==%d\n",
1106                                    e->lc_number, e->refcnt);
1107                        in_use += e->refcnt;
1108                }
1109        }
1110        if (!in_use)
1111                device->act_log = n;
1112        spin_unlock_irq(&device->al_lock);
1113        if (in_use) {
1114                drbd_err(device, "Activity log still in use!\n");
1115                lc_destroy(n);
1116                return -EBUSY;
1117        } else {
1118                if (t)
1119                        lc_destroy(t);
1120        }
1121        drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1122        return 0;
1123}
1124
1125static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1126                                   unsigned int max_bio_size)
1127{
1128        struct request_queue * const q = device->rq_queue;
1129        unsigned int max_hw_sectors = max_bio_size >> 9;
1130        unsigned int max_segments = 0;
1131        struct request_queue *b = NULL;
1132
1133        if (bdev) {
1134                b = bdev->backing_bdev->bd_disk->queue;
1135
1136                max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1137                rcu_read_lock();
1138                max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1139                rcu_read_unlock();
1140
1141                blk_set_stacking_limits(&q->limits);
1142                blk_queue_max_write_same_sectors(q, 0);
1143        }
1144
1145        blk_queue_logical_block_size(q, 512);
1146        blk_queue_max_hw_sectors(q, max_hw_sectors);
1147        /* This is the workaround for "bio would need to, but cannot, be split" */
1148        blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1149        blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1150
1151        if (b) {
1152                struct drbd_connection *connection = first_peer_device(device)->connection;
1153
1154                if (blk_queue_discard(b) &&
1155                    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1156                        /* For now, don't allow more than one activity log extent worth of data
1157                         * to be discarded in one go. We may need to rework drbd_al_begin_io()
1158                         * to allow for even larger discard ranges */
1159                        q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1160
1161                        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1162                        /* REALLY? Is stacking secdiscard "legal"? */
1163                        if (blk_queue_secdiscard(b))
1164                                queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1165                } else {
1166                        q->limits.max_discard_sectors = 0;
1167                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1168                        queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1169                }
1170
1171                blk_queue_stack_limits(q, b);
1172
1173                if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1174                        drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1175                                 q->backing_dev_info.ra_pages,
1176                                 b->backing_dev_info.ra_pages);
1177                        q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1178                }
1179        }
1180}
1181
1182void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
1183{
1184        unsigned int now, new, local, peer;
1185
1186        now = queue_max_hw_sectors(device->rq_queue) << 9;
1187        local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1188        peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1189
1190        if (bdev) {
1191                local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1192                device->local_max_bio_size = local;
1193        }
1194        local = min(local, DRBD_MAX_BIO_SIZE);
1195
1196        /* We may ignore peer limits if the peer is modern enough.
1197           Because new from 8.3.8 onwards the peer can use multiple
1198           BIOs for a single peer_request */
1199        if (device->state.conn >= C_WF_REPORT_PARAMS) {
1200                if (first_peer_device(device)->connection->agreed_pro_version < 94)
1201                        peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1202                        /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1203                else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1204                        peer = DRBD_MAX_SIZE_H80_PACKET;
1205                else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1206                        peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1207                else
1208                        peer = DRBD_MAX_BIO_SIZE;
1209
1210                /* We may later detach and re-attach on a disconnected Primary.
1211                 * Avoid this setting to jump back in that case.
1212                 * We want to store what we know the peer DRBD can handle,
1213                 * not what the peer IO backend can handle. */
1214                if (peer > device->peer_max_bio_size)
1215                        device->peer_max_bio_size = peer;
1216        }
1217        new = min(local, peer);
1218
1219        if (device->state.role == R_PRIMARY && new < now)
1220                drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1221
1222        if (new != now)
1223                drbd_info(device, "max BIO size = %u\n", new);
1224
1225        drbd_setup_queue_param(device, bdev, new);
1226}
1227
1228/* Starts the worker thread */
1229static void conn_reconfig_start(struct drbd_connection *connection)
1230{
1231        drbd_thread_start(&connection->worker);
1232        drbd_flush_workqueue(&connection->sender_work);
1233}
1234
1235/* if still unconfigured, stops worker again. */
1236static void conn_reconfig_done(struct drbd_connection *connection)
1237{
1238        bool stop_threads;
1239        spin_lock_irq(&connection->resource->req_lock);
1240        stop_threads = conn_all_vols_unconf(connection) &&
1241                connection->cstate == C_STANDALONE;
1242        spin_unlock_irq(&connection->resource->req_lock);
1243        if (stop_threads) {
1244                /* asender is implicitly stopped by receiver
1245                 * in conn_disconnect() */
1246                drbd_thread_stop(&connection->receiver);
1247                drbd_thread_stop(&connection->worker);
1248        }
1249}
1250
1251/* Make sure IO is suspended before calling this function(). */
1252static void drbd_suspend_al(struct drbd_device *device)
1253{
1254        int s = 0;
1255
1256        if (!lc_try_lock(device->act_log)) {
1257                drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1258                return;
1259        }
1260
1261        drbd_al_shrink(device);
1262        spin_lock_irq(&device->resource->req_lock);
1263        if (device->state.conn < C_CONNECTED)
1264                s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1265        spin_unlock_irq(&device->resource->req_lock);
1266        lc_unlock(device->act_log);
1267
1268        if (s)
1269                drbd_info(device, "Suspended AL updates\n");
1270}
1271
1272
1273static bool should_set_defaults(struct genl_info *info)
1274{
1275        unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1276        return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1277}
1278
1279static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1280{
1281        /* This is limited by 16 bit "slot" numbers,
1282         * and by available on-disk context storage.
1283         *
1284         * Also (u16)~0 is special (denotes a "free" extent).
1285         *
1286         * One transaction occupies one 4kB on-disk block,
1287         * we have n such blocks in the on disk ring buffer,
1288         * the "current" transaction may fail (n-1),
1289         * and there is 919 slot numbers context information per transaction.
1290         *
1291         * 72 transaction blocks amounts to more than 2**16 context slots,
1292         * so cap there first.
1293         */
1294        const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1295        const unsigned int sufficient_on_disk =
1296                (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1297                /AL_CONTEXT_PER_TRANSACTION;
1298
1299        unsigned int al_size_4k = bdev->md.al_size_4k;
1300
1301        if (al_size_4k > sufficient_on_disk)
1302                return max_al_nr;
1303
1304        return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1305}
1306
1307static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1308{
1309        return  a->disk_barrier != b->disk_barrier ||
1310                a->disk_flushes != b->disk_flushes ||
1311                a->disk_drain != b->disk_drain;
1312}
1313
1314int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1315{
1316        struct drbd_config_context adm_ctx;
1317        enum drbd_ret_code retcode;
1318        struct drbd_device *device;
1319        struct disk_conf *new_disk_conf, *old_disk_conf;
1320        struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1321        int err, fifo_size;
1322
1323        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1324        if (!adm_ctx.reply_skb)
1325                return retcode;
1326        if (retcode != NO_ERROR)
1327                goto finish;
1328
1329        device = adm_ctx.device;
1330        mutex_lock(&adm_ctx.resource->adm_mutex);
1331
1332        /* we also need a disk
1333         * to change the options on */
1334        if (!get_ldev(device)) {
1335                retcode = ERR_NO_DISK;
1336                goto out;
1337        }
1338
1339        new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1340        if (!new_disk_conf) {
1341                retcode = ERR_NOMEM;
1342                goto fail;
1343        }
1344
1345        mutex_lock(&device->resource->conf_update);
1346        old_disk_conf = device->ldev->disk_conf;
1347        *new_disk_conf = *old_disk_conf;
1348        if (should_set_defaults(info))
1349                set_disk_conf_defaults(new_disk_conf);
1350
1351        err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1352        if (err && err != -ENOMSG) {
1353                retcode = ERR_MANDATORY_TAG;
1354                drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1355                goto fail_unlock;
1356        }
1357
1358        if (!expect(new_disk_conf->resync_rate >= 1))
1359                new_disk_conf->resync_rate = 1;
1360
1361        if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1362                new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1363        if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1364                new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1365
1366        if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1367                new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1368
1369        fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1370        if (fifo_size != device->rs_plan_s->size) {
1371                new_plan = fifo_alloc(fifo_size);
1372                if (!new_plan) {
1373                        drbd_err(device, "kmalloc of fifo_buffer failed");
1374                        retcode = ERR_NOMEM;
1375                        goto fail_unlock;
1376                }
1377        }
1378
1379        drbd_suspend_io(device);
1380        wait_event(device->al_wait, lc_try_lock(device->act_log));
1381        drbd_al_shrink(device);
1382        err = drbd_check_al_size(device, new_disk_conf);
1383        lc_unlock(device->act_log);
1384        wake_up(&device->al_wait);
1385        drbd_resume_io(device);
1386
1387        if (err) {
1388                retcode = ERR_NOMEM;
1389                goto fail_unlock;
1390        }
1391
1392        write_lock_irq(&global_state_lock);
1393        retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1394        if (retcode == NO_ERROR) {
1395                rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1396                drbd_resync_after_changed(device);
1397        }
1398        write_unlock_irq(&global_state_lock);
1399
1400        if (retcode != NO_ERROR)
1401                goto fail_unlock;
1402
1403        if (new_plan) {
1404                old_plan = device->rs_plan_s;
1405                rcu_assign_pointer(device->rs_plan_s, new_plan);
1406        }
1407
1408        mutex_unlock(&device->resource->conf_update);
1409
1410        if (new_disk_conf->al_updates)
1411                device->ldev->md.flags &= ~MDF_AL_DISABLED;
1412        else
1413                device->ldev->md.flags |= MDF_AL_DISABLED;
1414
1415        if (new_disk_conf->md_flushes)
1416                clear_bit(MD_NO_FUA, &device->flags);
1417        else
1418                set_bit(MD_NO_FUA, &device->flags);
1419
1420        if (write_ordering_changed(old_disk_conf, new_disk_conf))
1421                drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
1422
1423        drbd_md_sync(device);
1424
1425        if (device->state.conn >= C_CONNECTED) {
1426                struct drbd_peer_device *peer_device;
1427
1428                for_each_peer_device(peer_device, device)
1429                        drbd_send_sync_param(peer_device);
1430        }
1431
1432        synchronize_rcu();
1433        kfree(old_disk_conf);
1434        kfree(old_plan);
1435        mod_timer(&device->request_timer, jiffies + HZ);
1436        goto success;
1437
1438fail_unlock:
1439        mutex_unlock(&device->resource->conf_update);
1440 fail:
1441        kfree(new_disk_conf);
1442        kfree(new_plan);
1443success:
1444        put_ldev(device);
1445 out:
1446        mutex_unlock(&adm_ctx.resource->adm_mutex);
1447 finish:
1448        drbd_adm_finish(&adm_ctx, info, retcode);
1449        return 0;
1450}
1451
1452int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1453{
1454        struct drbd_config_context adm_ctx;
1455        struct drbd_device *device;
1456        struct drbd_peer_device *peer_device;
1457        struct drbd_connection *connection;
1458        int err;
1459        enum drbd_ret_code retcode;
1460        enum determine_dev_size dd;
1461        sector_t max_possible_sectors;
1462        sector_t min_md_device_sectors;
1463        struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1464        struct disk_conf *new_disk_conf = NULL;
1465        struct block_device *bdev;
1466        struct lru_cache *resync_lru = NULL;
1467        struct fifo_buffer *new_plan = NULL;
1468        union drbd_state ns, os;
1469        enum drbd_state_rv rv;
1470        struct net_conf *nc;
1471
1472        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1473        if (!adm_ctx.reply_skb)
1474                return retcode;
1475        if (retcode != NO_ERROR)
1476                goto finish;
1477
1478        device = adm_ctx.device;
1479        mutex_lock(&adm_ctx.resource->adm_mutex);
1480        peer_device = first_peer_device(device);
1481        connection = peer_device ? peer_device->connection : NULL;
1482        conn_reconfig_start(connection);
1483
1484        /* if you want to reconfigure, please tear down first */
1485        if (device->state.disk > D_DISKLESS) {
1486                retcode = ERR_DISK_CONFIGURED;
1487                goto fail;
1488        }
1489        /* It may just now have detached because of IO error.  Make sure
1490         * drbd_ldev_destroy is done already, we may end up here very fast,
1491         * e.g. if someone calls attach from the on-io-error handler,
1492         * to realize a "hot spare" feature (not that I'd recommend that) */
1493        wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
1494
1495        /* make sure there is no leftover from previous force-detach attempts */
1496        clear_bit(FORCE_DETACH, &device->flags);
1497        clear_bit(WAS_IO_ERROR, &device->flags);
1498        clear_bit(WAS_READ_ERROR, &device->flags);
1499
1500        /* and no leftover from previously aborted resync or verify, either */
1501        device->rs_total = 0;
1502        device->rs_failed = 0;
1503        atomic_set(&device->rs_pending_cnt, 0);
1504
1505        /* allocation not in the IO path, drbdsetup context */
1506        nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1507        if (!nbc) {
1508                retcode = ERR_NOMEM;
1509                goto fail;
1510        }
1511        spin_lock_init(&nbc->md.uuid_lock);
1512
1513        new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1514        if (!new_disk_conf) {
1515                retcode = ERR_NOMEM;
1516                goto fail;
1517        }
1518        nbc->disk_conf = new_disk_conf;
1519
1520        set_disk_conf_defaults(new_disk_conf);
1521        err = disk_conf_from_attrs(new_disk_conf, info);
1522        if (err) {
1523                retcode = ERR_MANDATORY_TAG;
1524                drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1525                goto fail;
1526        }
1527
1528        if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1529                new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1530
1531        new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1532        if (!new_plan) {
1533                retcode = ERR_NOMEM;
1534                goto fail;
1535        }
1536
1537        if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1538                retcode = ERR_MD_IDX_INVALID;
1539                goto fail;
1540        }
1541
1542        write_lock_irq(&global_state_lock);
1543        retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1544        write_unlock_irq(&global_state_lock);
1545        if (retcode != NO_ERROR)
1546                goto fail;
1547
1548        rcu_read_lock();
1549        nc = rcu_dereference(connection->net_conf);
1550        if (nc) {
1551                if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1552                        rcu_read_unlock();
1553                        retcode = ERR_STONITH_AND_PROT_A;
1554                        goto fail;
1555                }
1556        }
1557        rcu_read_unlock();
1558
1559        bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1560                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1561        if (IS_ERR(bdev)) {
1562                drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1563                        PTR_ERR(bdev));
1564                retcode = ERR_OPEN_DISK;
1565                goto fail;
1566        }
1567        nbc->backing_bdev = bdev;
1568
1569        /*
1570         * meta_dev_idx >= 0: external fixed size, possibly multiple
1571         * drbd sharing one meta device.  TODO in that case, paranoia
1572         * check that [md_bdev, meta_dev_idx] is not yet used by some
1573         * other drbd minor!  (if you use drbd.conf + drbdadm, that
1574         * should check it for you already; but if you don't, or
1575         * someone fooled it, we need to double check here)
1576         */
1577        bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1578                                  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1579                                  (new_disk_conf->meta_dev_idx < 0) ?
1580                                  (void *)device : (void *)drbd_m_holder);
1581        if (IS_ERR(bdev)) {
1582                drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1583                        PTR_ERR(bdev));
1584                retcode = ERR_OPEN_MD_DISK;
1585                goto fail;
1586        }
1587        nbc->md_bdev = bdev;
1588
1589        if ((nbc->backing_bdev == nbc->md_bdev) !=
1590            (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1591             new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1592                retcode = ERR_MD_IDX_INVALID;
1593                goto fail;
1594        }
1595
1596        resync_lru = lc_create("resync", drbd_bm_ext_cache,
1597                        1, 61, sizeof(struct bm_extent),
1598                        offsetof(struct bm_extent, lce));
1599        if (!resync_lru) {
1600                retcode = ERR_NOMEM;
1601                goto fail;
1602        }
1603
1604        /* Read our meta data super block early.
1605         * This also sets other on-disk offsets. */
1606        retcode = drbd_md_read(device, nbc);
1607        if (retcode != NO_ERROR)
1608                goto fail;
1609
1610        if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1611                new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1612        if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1613                new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1614
1615        if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1616                drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1617                        (unsigned long long) drbd_get_max_capacity(nbc),
1618                        (unsigned long long) new_disk_conf->disk_size);
1619                retcode = ERR_DISK_TOO_SMALL;
1620                goto fail;
1621        }
1622
1623        if (new_disk_conf->meta_dev_idx < 0) {
1624                max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1625                /* at least one MB, otherwise it does not make sense */
1626                min_md_device_sectors = (2<<10);
1627        } else {
1628                max_possible_sectors = DRBD_MAX_SECTORS;
1629                min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1630        }
1631
1632        if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1633                retcode = ERR_MD_DISK_TOO_SMALL;
1634                drbd_warn(device, "refusing attach: md-device too small, "
1635                     "at least %llu sectors needed for this meta-disk type\n",
1636                     (unsigned long long) min_md_device_sectors);
1637                goto fail;
1638        }
1639
1640        /* Make sure the new disk is big enough
1641         * (we may currently be R_PRIMARY with no local disk...) */
1642        if (drbd_get_max_capacity(nbc) <
1643            drbd_get_capacity(device->this_bdev)) {
1644                retcode = ERR_DISK_TOO_SMALL;
1645                goto fail;
1646        }
1647
1648        nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1649
1650        if (nbc->known_size > max_possible_sectors) {
1651                drbd_warn(device, "==> truncating very big lower level device "
1652                        "to currently maximum possible %llu sectors <==\n",
1653                        (unsigned long long) max_possible_sectors);
1654                if (new_disk_conf->meta_dev_idx >= 0)
1655                        drbd_warn(device, "==>> using internal or flexible "
1656                                      "meta data may help <<==\n");
1657        }
1658
1659        drbd_suspend_io(device);
1660        /* also wait for the last barrier ack. */
1661        /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1662         * We need a way to either ignore barrier acks for barriers sent before a device
1663         * was attached, or a way to wait for all pending barrier acks to come in.
1664         * As barriers are counted per resource,
1665         * we'd need to suspend io on all devices of a resource.
1666         */
1667        wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1668        /* and for any other previously queued work */
1669        drbd_flush_workqueue(&connection->sender_work);
1670
1671        rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1672        retcode = rv;  /* FIXME: Type mismatch. */
1673        drbd_resume_io(device);
1674        if (rv < SS_SUCCESS)
1675                goto fail;
1676
1677        if (!get_ldev_if_state(device, D_ATTACHING))
1678                goto force_diskless;
1679
1680        if (!device->bitmap) {
1681                if (drbd_bm_init(device)) {
1682                        retcode = ERR_NOMEM;
1683                        goto force_diskless_dec;
1684                }
1685        }
1686
1687        if (device->state.conn < C_CONNECTED &&
1688            device->state.role == R_PRIMARY && device->ed_uuid &&
1689            (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1690                drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1691                    (unsigned long long)device->ed_uuid);
1692                retcode = ERR_DATA_NOT_CURRENT;
1693                goto force_diskless_dec;
1694        }
1695
1696        /* Since we are diskless, fix the activity log first... */
1697        if (drbd_check_al_size(device, new_disk_conf)) {
1698                retcode = ERR_NOMEM;
1699                goto force_diskless_dec;
1700        }
1701
1702        /* Prevent shrinking of consistent devices ! */
1703        if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1704            drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1705                drbd_warn(device, "refusing to truncate a consistent device\n");
1706                retcode = ERR_DISK_TOO_SMALL;
1707                goto force_diskless_dec;
1708        }
1709
1710        /* Reset the "barriers don't work" bits here, then force meta data to
1711         * be written, to ensure we determine if barriers are supported. */
1712        if (new_disk_conf->md_flushes)
1713                clear_bit(MD_NO_FUA, &device->flags);
1714        else
1715                set_bit(MD_NO_FUA, &device->flags);
1716
1717        /* Point of no return reached.
1718         * Devices and memory are no longer released by error cleanup below.
1719         * now device takes over responsibility, and the state engine should
1720         * clean it up somewhere.  */
1721        D_ASSERT(device, device->ldev == NULL);
1722        device->ldev = nbc;
1723        device->resync = resync_lru;
1724        device->rs_plan_s = new_plan;
1725        nbc = NULL;
1726        resync_lru = NULL;
1727        new_disk_conf = NULL;
1728        new_plan = NULL;
1729
1730        drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
1731
1732        if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1733                set_bit(CRASHED_PRIMARY, &device->flags);
1734        else
1735                clear_bit(CRASHED_PRIMARY, &device->flags);
1736
1737        if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1738            !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1739                set_bit(CRASHED_PRIMARY, &device->flags);
1740
1741        device->send_cnt = 0;
1742        device->recv_cnt = 0;
1743        device->read_cnt = 0;
1744        device->writ_cnt = 0;
1745
1746        drbd_reconsider_max_bio_size(device, device->ldev);
1747
1748        /* If I am currently not R_PRIMARY,
1749         * but meta data primary indicator is set,
1750         * I just now recover from a hard crash,
1751         * and have been R_PRIMARY before that crash.
1752         *
1753         * Now, if I had no connection before that crash
1754         * (have been degraded R_PRIMARY), chances are that
1755         * I won't find my peer now either.
1756         *
1757         * In that case, and _only_ in that case,
1758         * we use the degr-wfc-timeout instead of the default,
1759         * so we can automatically recover from a crash of a
1760         * degraded but active "cluster" after a certain timeout.
1761         */
1762        clear_bit(USE_DEGR_WFC_T, &device->flags);
1763        if (device->state.role != R_PRIMARY &&
1764             drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1765            !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1766                set_bit(USE_DEGR_WFC_T, &device->flags);
1767
1768        dd = drbd_determine_dev_size(device, 0, NULL);
1769        if (dd <= DS_ERROR) {
1770                retcode = ERR_NOMEM_BITMAP;
1771                goto force_diskless_dec;
1772        } else if (dd == DS_GREW)
1773                set_bit(RESYNC_AFTER_NEG, &device->flags);
1774
1775        if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1776            (test_bit(CRASHED_PRIMARY, &device->flags) &&
1777             drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1778                drbd_info(device, "Assuming that all blocks are out of sync "
1779                     "(aka FullSync)\n");
1780                if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1781                        "set_n_write from attaching", BM_LOCKED_MASK)) {
1782                        retcode = ERR_IO_MD_DISK;
1783                        goto force_diskless_dec;
1784                }
1785        } else {
1786                if (drbd_bitmap_io(device, &drbd_bm_read,
1787                        "read from attaching", BM_LOCKED_MASK)) {
1788                        retcode = ERR_IO_MD_DISK;
1789                        goto force_diskless_dec;
1790                }
1791        }
1792
1793        if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1794                drbd_suspend_al(device); /* IO is still suspended here... */
1795
1796        spin_lock_irq(&device->resource->req_lock);
1797        os = drbd_read_state(device);
1798        ns = os;
1799        /* If MDF_CONSISTENT is not set go into inconsistent state,
1800           otherwise investigate MDF_WasUpToDate...
1801           If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1802           otherwise into D_CONSISTENT state.
1803        */
1804        if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1805                if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1806                        ns.disk = D_CONSISTENT;
1807                else
1808                        ns.disk = D_OUTDATED;
1809        } else {
1810                ns.disk = D_INCONSISTENT;
1811        }
1812
1813        if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1814                ns.pdsk = D_OUTDATED;
1815
1816        rcu_read_lock();
1817        if (ns.disk == D_CONSISTENT &&
1818            (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1819                ns.disk = D_UP_TO_DATE;
1820
1821        /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1822           MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1823           this point, because drbd_request_state() modifies these
1824           flags. */
1825
1826        if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1827                device->ldev->md.flags &= ~MDF_AL_DISABLED;
1828        else
1829                device->ldev->md.flags |= MDF_AL_DISABLED;
1830
1831        rcu_read_unlock();
1832
1833        /* In case we are C_CONNECTED postpone any decision on the new disk
1834           state after the negotiation phase. */
1835        if (device->state.conn == C_CONNECTED) {
1836                device->new_state_tmp.i = ns.i;
1837                ns.i = os.i;
1838                ns.disk = D_NEGOTIATING;
1839
1840                /* We expect to receive up-to-date UUIDs soon.
1841                   To avoid a race in receive_state, free p_uuid while
1842                   holding req_lock. I.e. atomic with the state change */
1843                kfree(device->p_uuid);
1844                device->p_uuid = NULL;
1845        }
1846
1847        rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1848        spin_unlock_irq(&device->resource->req_lock);
1849
1850        if (rv < SS_SUCCESS)
1851                goto force_diskless_dec;
1852
1853        mod_timer(&device->request_timer, jiffies + HZ);
1854
1855        if (device->state.role == R_PRIMARY)
1856                device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1857        else
1858                device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1859
1860        drbd_md_mark_dirty(device);
1861        drbd_md_sync(device);
1862
1863        kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1864        put_ldev(device);
1865        conn_reconfig_done(connection);
1866        mutex_unlock(&adm_ctx.resource->adm_mutex);
1867        drbd_adm_finish(&adm_ctx, info, retcode);
1868        return 0;
1869
1870 force_diskless_dec:
1871        put_ldev(device);
1872 force_diskless:
1873        drbd_force_state(device, NS(disk, D_DISKLESS));
1874        drbd_md_sync(device);
1875 fail:
1876        conn_reconfig_done(connection);
1877        if (nbc) {
1878                if (nbc->backing_bdev)
1879                        blkdev_put(nbc->backing_bdev,
1880                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1881                if (nbc->md_bdev)
1882                        blkdev_put(nbc->md_bdev,
1883                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1884                kfree(nbc);
1885        }
1886        kfree(new_disk_conf);
1887        lc_destroy(resync_lru);
1888        kfree(new_plan);
1889        mutex_unlock(&adm_ctx.resource->adm_mutex);
1890 finish:
1891        drbd_adm_finish(&adm_ctx, info, retcode);
1892        return 0;
1893}
1894
1895static int adm_detach(struct drbd_device *device, int force)
1896{
1897        enum drbd_state_rv retcode;
1898        int ret;
1899
1900        if (force) {
1901                set_bit(FORCE_DETACH, &device->flags);
1902                drbd_force_state(device, NS(disk, D_FAILED));
1903                retcode = SS_SUCCESS;
1904                goto out;
1905        }
1906
1907        drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1908        drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
1909        retcode = drbd_request_state(device, NS(disk, D_FAILED));
1910        drbd_md_put_buffer(device);
1911        /* D_FAILED will transition to DISKLESS. */
1912        ret = wait_event_interruptible(device->misc_wait,
1913                        device->state.disk != D_FAILED);
1914        drbd_resume_io(device);
1915        if ((int)retcode == (int)SS_IS_DISKLESS)
1916                retcode = SS_NOTHING_TO_DO;
1917        if (ret)
1918                retcode = ERR_INTR;
1919out:
1920        return retcode;
1921}
1922
1923/* Detaching the disk is a process in multiple stages.  First we need to lock
1924 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1925 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1926 * internal references as well.
1927 * Only then we have finally detached. */
1928int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1929{
1930        struct drbd_config_context adm_ctx;
1931        enum drbd_ret_code retcode;
1932        struct detach_parms parms = { };
1933        int err;
1934
1935        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1936        if (!adm_ctx.reply_skb)
1937                return retcode;
1938        if (retcode != NO_ERROR)
1939                goto out;
1940
1941        if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1942                err = detach_parms_from_attrs(&parms, info);
1943                if (err) {
1944                        retcode = ERR_MANDATORY_TAG;
1945                        drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1946                        goto out;
1947                }
1948        }
1949
1950        mutex_lock(&adm_ctx.resource->adm_mutex);
1951        retcode = adm_detach(adm_ctx.device, parms.force_detach);
1952        mutex_unlock(&adm_ctx.resource->adm_mutex);
1953out:
1954        drbd_adm_finish(&adm_ctx, info, retcode);
1955        return 0;
1956}
1957
1958static bool conn_resync_running(struct drbd_connection *connection)
1959{
1960        struct drbd_peer_device *peer_device;
1961        bool rv = false;
1962        int vnr;
1963
1964        rcu_read_lock();
1965        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1966                struct drbd_device *device = peer_device->device;
1967                if (device->state.conn == C_SYNC_SOURCE ||
1968                    device->state.conn == C_SYNC_TARGET ||
1969                    device->state.conn == C_PAUSED_SYNC_S ||
1970                    device->state.conn == C_PAUSED_SYNC_T) {
1971                        rv = true;
1972                        break;
1973                }
1974        }
1975        rcu_read_unlock();
1976
1977        return rv;
1978}
1979
1980static bool conn_ov_running(struct drbd_connection *connection)
1981{
1982        struct drbd_peer_device *peer_device;
1983        bool rv = false;
1984        int vnr;
1985
1986        rcu_read_lock();
1987        idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1988                struct drbd_device *device = peer_device->device;
1989                if (device->state.conn == C_VERIFY_S ||
1990                    device->state.conn == C_VERIFY_T) {
1991                        rv = true;
1992                        break;
1993                }
1994        }
1995        rcu_read_unlock();
1996
1997        return rv;
1998}
1999
2000static enum drbd_ret_code
2001_check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
2002{
2003        struct drbd_peer_device *peer_device;
2004        int i;
2005
2006        if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
2007                if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2008                        return ERR_NEED_APV_100;
2009
2010                if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2011                        return ERR_NEED_APV_100;
2012
2013                if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2014                        return ERR_NEED_APV_100;
2015        }
2016
2017        if (!new_net_conf->two_primaries &&
2018            conn_highest_role(connection) == R_PRIMARY &&
2019            conn_highest_peer(connection) == R_PRIMARY)
2020                return ERR_NEED_ALLOW_TWO_PRI;
2021
2022        if (new_net_conf->two_primaries &&
2023            (new_net_conf->wire_protocol != DRBD_PROT_C))
2024                return ERR_NOT_PROTO_C;
2025
2026        idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2027                struct drbd_device *device = peer_device->device;
2028                if (get_ldev(device)) {
2029                        enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2030                        put_ldev(device);
2031                        if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2032                                return ERR_STONITH_AND_PROT_A;
2033                }
2034                if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2035                        return ERR_DISCARD_IMPOSSIBLE;
2036        }
2037
2038        if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2039                return ERR_CONG_NOT_PROTO_A;
2040
2041        return NO_ERROR;
2042}
2043
2044static enum drbd_ret_code
2045check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2046{
2047        static enum drbd_ret_code rv;
2048        struct drbd_peer_device *peer_device;
2049        int i;
2050
2051        rcu_read_lock();
2052        rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2053        rcu_read_unlock();
2054
2055        /* connection->volumes protected by genl_lock() here */
2056        idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2057                struct drbd_device *device = peer_device->device;
2058                if (!device->bitmap) {
2059                        if (drbd_bm_init(device))
2060                                return ERR_NOMEM;
2061                }
2062        }
2063
2064        return rv;
2065}
2066
2067struct crypto {
2068        struct crypto_hash *verify_tfm;
2069        struct crypto_hash *csums_tfm;
2070        struct crypto_hash *cram_hmac_tfm;
2071        struct crypto_hash *integrity_tfm;
2072};
2073
2074static int
2075alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2076{
2077        if (!tfm_name[0])
2078                return NO_ERROR;
2079
2080        *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2081        if (IS_ERR(*tfm)) {
2082                *tfm = NULL;
2083                return err_alg;
2084        }
2085
2086        return NO_ERROR;
2087}
2088
2089static enum drbd_ret_code
2090alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2091{
2092        char hmac_name[CRYPTO_MAX_ALG_NAME];
2093        enum drbd_ret_code rv;
2094
2095        rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2096                       ERR_CSUMS_ALG);
2097        if (rv != NO_ERROR)
2098                return rv;
2099        rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2100                       ERR_VERIFY_ALG);
2101        if (rv != NO_ERROR)
2102                return rv;
2103        rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2104                       ERR_INTEGRITY_ALG);
2105        if (rv != NO_ERROR)
2106                return rv;
2107        if (new_net_conf->cram_hmac_alg[0] != 0) {
2108                snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2109                         new_net_conf->cram_hmac_alg);
2110
2111                rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2112                               ERR_AUTH_ALG);
2113        }
2114
2115        return rv;
2116}
2117
2118static void free_crypto(struct crypto *crypto)
2119{
2120        crypto_free_hash(crypto->cram_hmac_tfm);
2121        crypto_free_hash(crypto->integrity_tfm);
2122        crypto_free_hash(crypto->csums_tfm);
2123        crypto_free_hash(crypto->verify_tfm);
2124}
2125
2126int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2127{
2128        struct drbd_config_context adm_ctx;
2129        enum drbd_ret_code retcode;
2130        struct drbd_connection *connection;
2131        struct net_conf *old_net_conf, *new_net_conf = NULL;
2132        int err;
2133        int ovr; /* online verify running */
2134        int rsr; /* re-sync running */
2135        struct crypto crypto = { };
2136
2137        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2138        if (!adm_ctx.reply_skb)
2139                return retcode;
2140        if (retcode != NO_ERROR)
2141                goto finish;
2142
2143        connection = adm_ctx.connection;
2144        mutex_lock(&adm_ctx.resource->adm_mutex);
2145
2146        new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2147        if (!new_net_conf) {
2148                retcode = ERR_NOMEM;
2149                goto out;
2150        }
2151
2152        conn_reconfig_start(connection);
2153
2154        mutex_lock(&connection->data.mutex);
2155        mutex_lock(&connection->resource->conf_update);
2156        old_net_conf = connection->net_conf;
2157
2158        if (!old_net_conf) {
2159                drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2160                retcode = ERR_INVALID_REQUEST;
2161                goto fail;
2162        }
2163
2164        *new_net_conf = *old_net_conf;
2165        if (should_set_defaults(info))
2166                set_net_conf_defaults(new_net_conf);
2167
2168        err = net_conf_from_attrs_for_change(new_net_conf, info);
2169        if (err && err != -ENOMSG) {
2170                retcode = ERR_MANDATORY_TAG;
2171                drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2172                goto fail;
2173        }
2174
2175        retcode = check_net_options(connection, new_net_conf);
2176        if (retcode != NO_ERROR)
2177                goto fail;
2178
2179        /* re-sync running */
2180        rsr = conn_resync_running(connection);
2181        if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2182                retcode = ERR_CSUMS_RESYNC_RUNNING;
2183                goto fail;
2184        }
2185
2186        /* online verify running */
2187        ovr = conn_ov_running(connection);
2188        if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2189                retcode = ERR_VERIFY_RUNNING;
2190                goto fail;
2191        }
2192
2193        retcode = alloc_crypto(&crypto, new_net_conf);
2194        if (retcode != NO_ERROR)
2195                goto fail;
2196
2197        rcu_assign_pointer(connection->net_conf, new_net_conf);
2198
2199        if (!rsr) {
2200                crypto_free_hash(connection->csums_tfm);
2201                connection->csums_tfm = crypto.csums_tfm;
2202                crypto.csums_tfm = NULL;
2203        }
2204        if (!ovr) {
2205                crypto_free_hash(connection->verify_tfm);
2206                connection->verify_tfm = crypto.verify_tfm;
2207                crypto.verify_tfm = NULL;
2208        }
2209
2210        crypto_free_hash(connection->integrity_tfm);
2211        connection->integrity_tfm = crypto.integrity_tfm;
2212        if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2213                /* Do this without trying to take connection->data.mutex again.  */
2214                __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2215
2216        crypto_free_hash(connection->cram_hmac_tfm);
2217        connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2218
2219        mutex_unlock(&connection->resource->conf_update);
2220        mutex_unlock(&connection->data.mutex);
2221        synchronize_rcu();
2222        kfree(old_net_conf);
2223
2224        if (connection->cstate >= C_WF_REPORT_PARAMS) {
2225                struct drbd_peer_device *peer_device;
2226                int vnr;
2227
2228                idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2229                        drbd_send_sync_param(peer_device);
2230        }
2231
2232        goto done;
2233
2234 fail:
2235        mutex_unlock(&connection->resource->conf_update);
2236        mutex_unlock(&connection->data.mutex);
2237        free_crypto(&crypto);
2238        kfree(new_net_conf);
2239 done:
2240        conn_reconfig_done(connection);
2241 out:
2242        mutex_unlock(&adm_ctx.resource->adm_mutex);
2243 finish:
2244        drbd_adm_finish(&adm_ctx, info, retcode);
2245        return 0;
2246}
2247
2248int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2249{
2250        struct drbd_config_context adm_ctx;
2251        struct drbd_peer_device *peer_device;
2252        struct net_conf *old_net_conf, *new_net_conf = NULL;
2253        struct crypto crypto = { };
2254        struct drbd_resource *resource;
2255        struct drbd_connection *connection;
2256        enum drbd_ret_code retcode;
2257        int i;
2258        int err;
2259
2260        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2261
2262        if (!adm_ctx.reply_skb)
2263                return retcode;
2264        if (retcode != NO_ERROR)
2265                goto out;
2266        if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2267                drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2268                retcode = ERR_INVALID_REQUEST;
2269                goto out;
2270        }
2271
2272        /* No need for _rcu here. All reconfiguration is
2273         * strictly serialized on genl_lock(). We are protected against
2274         * concurrent reconfiguration/addition/deletion */
2275        for_each_resource(resource, &drbd_resources) {
2276                for_each_connection(connection, resource) {
2277                        if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2278                            !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2279                                    connection->my_addr_len)) {
2280                                retcode = ERR_LOCAL_ADDR;
2281                                goto out;
2282                        }
2283
2284                        if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2285                            !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2286                                    connection->peer_addr_len)) {
2287                                retcode = ERR_PEER_ADDR;
2288                                goto out;
2289                        }
2290                }
2291        }
2292
2293        mutex_lock(&adm_ctx.resource->adm_mutex);
2294        connection = first_connection(adm_ctx.resource);
2295        conn_reconfig_start(connection);
2296
2297        if (connection->cstate > C_STANDALONE) {
2298                retcode = ERR_NET_CONFIGURED;
2299                goto fail;
2300        }
2301
2302        /* allocation not in the IO path, drbdsetup / netlink process context */
2303        new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2304        if (!new_net_conf) {
2305                retcode = ERR_NOMEM;
2306                goto fail;
2307        }
2308
2309        set_net_conf_defaults(new_net_conf);
2310
2311        err = net_conf_from_attrs(new_net_conf, info);
2312        if (err && err != -ENOMSG) {
2313                retcode = ERR_MANDATORY_TAG;
2314                drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2315                goto fail;
2316        }
2317
2318        retcode = check_net_options(connection, new_net_conf);
2319        if (retcode != NO_ERROR)
2320                goto fail;
2321
2322        retcode = alloc_crypto(&crypto, new_net_conf);
2323        if (retcode != NO_ERROR)
2324                goto fail;
2325
2326        ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2327
2328        drbd_flush_workqueue(&connection->sender_work);
2329
2330        mutex_lock(&adm_ctx.resource->conf_update);
2331        old_net_conf = connection->net_conf;
2332        if (old_net_conf) {
2333                retcode = ERR_NET_CONFIGURED;
2334                mutex_unlock(&adm_ctx.resource->conf_update);
2335                goto fail;
2336        }
2337        rcu_assign_pointer(connection->net_conf, new_net_conf);
2338
2339        conn_free_crypto(connection);
2340        connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2341        connection->integrity_tfm = crypto.integrity_tfm;
2342        connection->csums_tfm = crypto.csums_tfm;
2343        connection->verify_tfm = crypto.verify_tfm;
2344
2345        connection->my_addr_len = nla_len(adm_ctx.my_addr);
2346        memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2347        connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2348        memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2349
2350        mutex_unlock(&adm_ctx.resource->conf_update);
2351
2352        rcu_read_lock();
2353        idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2354                struct drbd_device *device = peer_device->device;
2355                device->send_cnt = 0;
2356                device->recv_cnt = 0;
2357        }
2358        rcu_read_unlock();
2359
2360        retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2361
2362        conn_reconfig_done(connection);
2363        mutex_unlock(&adm_ctx.resource->adm_mutex);
2364        drbd_adm_finish(&adm_ctx, info, retcode);
2365        return 0;
2366
2367fail:
2368        free_crypto(&crypto);
2369        kfree(new_net_conf);
2370
2371        conn_reconfig_done(connection);
2372        mutex_unlock(&adm_ctx.resource->adm_mutex);
2373out:
2374        drbd_adm_finish(&adm_ctx, info, retcode);
2375        return 0;
2376}
2377
2378static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2379{
2380        enum drbd_state_rv rv;
2381
2382        rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2383                        force ? CS_HARD : 0);
2384
2385        switch (rv) {
2386        case SS_NOTHING_TO_DO:
2387                break;
2388        case SS_ALREADY_STANDALONE:
2389                return SS_SUCCESS;
2390        case SS_PRIMARY_NOP:
2391                /* Our state checking code wants to see the peer outdated. */
2392                rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2393
2394                if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2395                        rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2396
2397                break;
2398        case SS_CW_FAILED_BY_PEER:
2399                /* The peer probably wants to see us outdated. */
2400                rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2401                                                        disk, D_OUTDATED), 0);
2402                if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2403                        rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2404                                        CS_HARD);
2405                }
2406                break;
2407        default:;
2408                /* no special handling necessary */
2409        }
2410
2411        if (rv >= SS_SUCCESS) {
2412                enum drbd_state_rv rv2;
2413                /* No one else can reconfigure the network while I am here.
2414                 * The state handling only uses drbd_thread_stop_nowait(),
2415                 * we want to really wait here until the receiver is no more.
2416                 */
2417                drbd_thread_stop(&connection->receiver);
2418
2419                /* Race breaker.  This additional state change request may be
2420                 * necessary, if this was a forced disconnect during a receiver
2421                 * restart.  We may have "killed" the receiver thread just
2422                 * after drbd_receiver() returned.  Typically, we should be
2423                 * C_STANDALONE already, now, and this becomes a no-op.
2424                 */
2425                rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2426                                CS_VERBOSE | CS_HARD);
2427                if (rv2 < SS_SUCCESS)
2428                        drbd_err(connection,
2429                                "unexpected rv2=%d in conn_try_disconnect()\n",
2430                                rv2);
2431        }
2432        return rv;
2433}
2434
2435int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2436{
2437        struct drbd_config_context adm_ctx;
2438        struct disconnect_parms parms;
2439        struct drbd_connection *connection;
2440        enum drbd_state_rv rv;
2441        enum drbd_ret_code retcode;
2442        int err;
2443
2444        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2445        if (!adm_ctx.reply_skb)
2446                return retcode;
2447        if (retcode != NO_ERROR)
2448                goto fail;
2449
2450        connection = adm_ctx.connection;
2451        memset(&parms, 0, sizeof(parms));
2452        if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2453                err = disconnect_parms_from_attrs(&parms, info);
2454                if (err) {
2455                        retcode = ERR_MANDATORY_TAG;
2456                        drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2457                        goto fail;
2458                }
2459        }
2460
2461        mutex_lock(&adm_ctx.resource->adm_mutex);
2462        rv = conn_try_disconnect(connection, parms.force_disconnect);
2463        if (rv < SS_SUCCESS)
2464                retcode = rv;  /* FIXME: Type mismatch. */
2465        else
2466                retcode = NO_ERROR;
2467        mutex_unlock(&adm_ctx.resource->adm_mutex);
2468 fail:
2469        drbd_adm_finish(&adm_ctx, info, retcode);
2470        return 0;
2471}
2472
2473void resync_after_online_grow(struct drbd_device *device)
2474{
2475        int iass; /* I am sync source */
2476
2477        drbd_info(device, "Resync of new storage after online grow\n");
2478        if (device->state.role != device->state.peer)
2479                iass = (device->state.role == R_PRIMARY);
2480        else
2481                iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2482
2483        if (iass)
2484                drbd_start_resync(device, C_SYNC_SOURCE);
2485        else
2486                _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2487}
2488
2489int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2490{
2491        struct drbd_config_context adm_ctx;
2492        struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2493        struct resize_parms rs;
2494        struct drbd_device *device;
2495        enum drbd_ret_code retcode;
2496        enum determine_dev_size dd;
2497        bool change_al_layout = false;
2498        enum dds_flags ddsf;
2499        sector_t u_size;
2500        int err;
2501
2502        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2503        if (!adm_ctx.reply_skb)
2504                return retcode;
2505        if (retcode != NO_ERROR)
2506                goto finish;
2507
2508        mutex_lock(&adm_ctx.resource->adm_mutex);
2509        device = adm_ctx.device;
2510        if (!get_ldev(device)) {
2511                retcode = ERR_NO_DISK;
2512                goto fail;
2513        }
2514
2515        memset(&rs, 0, sizeof(struct resize_parms));
2516        rs.al_stripes = device->ldev->md.al_stripes;
2517        rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2518        if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2519                err = resize_parms_from_attrs(&rs, info);
2520                if (err) {
2521                        retcode = ERR_MANDATORY_TAG;
2522                        drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2523                        goto fail_ldev;
2524                }
2525        }
2526
2527        if (device->state.conn > C_CONNECTED) {
2528                retcode = ERR_RESIZE_RESYNC;
2529                goto fail_ldev;
2530        }
2531
2532        if (device->state.role == R_SECONDARY &&
2533            device->state.peer == R_SECONDARY) {
2534                retcode = ERR_NO_PRIMARY;
2535                goto fail_ldev;
2536        }
2537
2538        if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2539                retcode = ERR_NEED_APV_93;
2540                goto fail_ldev;
2541        }
2542
2543        rcu_read_lock();
2544        u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2545        rcu_read_unlock();
2546        if (u_size != (sector_t)rs.resize_size) {
2547                new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2548                if (!new_disk_conf) {
2549                        retcode = ERR_NOMEM;
2550                        goto fail_ldev;
2551                }
2552        }
2553
2554        if (device->ldev->md.al_stripes != rs.al_stripes ||
2555            device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2556                u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2557
2558                if (al_size_k > (16 * 1024 * 1024)) {
2559                        retcode = ERR_MD_LAYOUT_TOO_BIG;
2560                        goto fail_ldev;
2561                }
2562
2563                if (al_size_k < MD_32kB_SECT/2) {
2564                        retcode = ERR_MD_LAYOUT_TOO_SMALL;
2565                        goto fail_ldev;
2566                }
2567
2568                if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2569                        retcode = ERR_MD_LAYOUT_CONNECTED;
2570                        goto fail_ldev;
2571                }
2572
2573                change_al_layout = true;
2574        }
2575
2576        if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2577                device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2578
2579        if (new_disk_conf) {
2580                mutex_lock(&device->resource->conf_update);
2581                old_disk_conf = device->ldev->disk_conf;
2582                *new_disk_conf = *old_disk_conf;
2583                new_disk_conf->disk_size = (sector_t)rs.resize_size;
2584                rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2585                mutex_unlock(&device->resource->conf_update);
2586                synchronize_rcu();
2587                kfree(old_disk_conf);
2588        }
2589
2590        ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2591        dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2592        drbd_md_sync(device);
2593        put_ldev(device);
2594        if (dd == DS_ERROR) {
2595                retcode = ERR_NOMEM_BITMAP;
2596                goto fail;
2597        } else if (dd == DS_ERROR_SPACE_MD) {
2598                retcode = ERR_MD_LAYOUT_NO_FIT;
2599                goto fail;
2600        } else if (dd == DS_ERROR_SHRINK) {
2601                retcode = ERR_IMPLICIT_SHRINK;
2602                goto fail;
2603        }
2604
2605        if (device->state.conn == C_CONNECTED) {
2606                if (dd == DS_GREW)
2607                        set_bit(RESIZE_PENDING, &device->flags);
2608
2609                drbd_send_uuids(first_peer_device(device));
2610                drbd_send_sizes(first_peer_device(device), 1, ddsf);
2611        }
2612
2613 fail:
2614        mutex_unlock(&adm_ctx.resource->adm_mutex);
2615 finish:
2616        drbd_adm_finish(&adm_ctx, info, retcode);
2617        return 0;
2618
2619 fail_ldev:
2620        put_ldev(device);
2621        goto fail;
2622}
2623
2624int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2625{
2626        struct drbd_config_context adm_ctx;
2627        enum drbd_ret_code retcode;
2628        struct res_opts res_opts;
2629        int err;
2630
2631        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2632        if (!adm_ctx.reply_skb)
2633                return retcode;
2634        if (retcode != NO_ERROR)
2635                goto fail;
2636
2637        res_opts = adm_ctx.resource->res_opts;
2638        if (should_set_defaults(info))
2639                set_res_opts_defaults(&res_opts);
2640
2641        err = res_opts_from_attrs(&res_opts, info);
2642        if (err && err != -ENOMSG) {
2643                retcode = ERR_MANDATORY_TAG;
2644                drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2645                goto fail;
2646        }
2647
2648        mutex_lock(&adm_ctx.resource->adm_mutex);
2649        err = set_resource_options(adm_ctx.resource, &res_opts);
2650        if (err) {
2651                retcode = ERR_INVALID_REQUEST;
2652                if (err == -ENOMEM)
2653                        retcode = ERR_NOMEM;
2654        }
2655        mutex_unlock(&adm_ctx.resource->adm_mutex);
2656
2657fail:
2658        drbd_adm_finish(&adm_ctx, info, retcode);
2659        return 0;
2660}
2661
2662int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2663{
2664        struct drbd_config_context adm_ctx;
2665        struct drbd_device *device;
2666        int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2667
2668        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2669        if (!adm_ctx.reply_skb)
2670                return retcode;
2671        if (retcode != NO_ERROR)
2672                goto out;
2673
2674        device = adm_ctx.device;
2675        if (!get_ldev(device)) {
2676                retcode = ERR_NO_DISK;
2677                goto out;
2678        }
2679
2680        mutex_lock(&adm_ctx.resource->adm_mutex);
2681
2682        /* If there is still bitmap IO pending, probably because of a previous
2683         * resync just being finished, wait for it before requesting a new resync.
2684         * Also wait for it's after_state_ch(). */
2685        drbd_suspend_io(device);
2686        wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2687        drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2688
2689        /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2690         * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2691         * try to start a resync handshake as sync target for full sync.
2692         */
2693        if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2694                retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2695                if (retcode >= SS_SUCCESS) {
2696                        if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2697                                "set_n_write from invalidate", BM_LOCKED_MASK))
2698                                retcode = ERR_IO_MD_DISK;
2699                }
2700        } else
2701                retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2702        drbd_resume_io(device);
2703        mutex_unlock(&adm_ctx.resource->adm_mutex);
2704        put_ldev(device);
2705out:
2706        drbd_adm_finish(&adm_ctx, info, retcode);
2707        return 0;
2708}
2709
2710static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2711                union drbd_state mask, union drbd_state val)
2712{
2713        struct drbd_config_context adm_ctx;
2714        enum drbd_ret_code retcode;
2715
2716        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2717        if (!adm_ctx.reply_skb)
2718                return retcode;
2719        if (retcode != NO_ERROR)
2720                goto out;
2721
2722        mutex_lock(&adm_ctx.resource->adm_mutex);
2723        retcode = drbd_request_state(adm_ctx.device, mask, val);
2724        mutex_unlock(&adm_ctx.resource->adm_mutex);
2725out:
2726        drbd_adm_finish(&adm_ctx, info, retcode);
2727        return 0;
2728}
2729
2730static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
2731{
2732        int rv;
2733
2734        rv = drbd_bmio_set_n_write(device);
2735        drbd_suspend_al(device);
2736        return rv;
2737}
2738
2739int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2740{
2741        struct drbd_config_context adm_ctx;
2742        int retcode; /* drbd_ret_code, drbd_state_rv */
2743        struct drbd_device *device;
2744
2745        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2746        if (!adm_ctx.reply_skb)
2747                return retcode;
2748        if (retcode != NO_ERROR)
2749                goto out;
2750
2751        device = adm_ctx.device;
2752        if (!get_ldev(device)) {
2753                retcode = ERR_NO_DISK;
2754                goto out;
2755        }
2756
2757        mutex_lock(&adm_ctx.resource->adm_mutex);
2758
2759        /* If there is still bitmap IO pending, probably because of a previous
2760         * resync just being finished, wait for it before requesting a new resync.
2761         * Also wait for it's after_state_ch(). */
2762        drbd_suspend_io(device);
2763        wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2764        drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2765
2766        /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2767         * in the bitmap.  Otherwise, try to start a resync handshake
2768         * as sync source for full sync.
2769         */
2770        if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2771                /* The peer will get a resync upon connect anyways. Just make that
2772                   into a full resync. */
2773                retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2774                if (retcode >= SS_SUCCESS) {
2775                        if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2776                                "set_n_write from invalidate_peer",
2777                                BM_LOCKED_SET_ALLOWED))
2778                                retcode = ERR_IO_MD_DISK;
2779                }
2780        } else
2781                retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2782        drbd_resume_io(device);
2783        mutex_unlock(&adm_ctx.resource->adm_mutex);
2784        put_ldev(device);
2785out:
2786        drbd_adm_finish(&adm_ctx, info, retcode);
2787        return 0;
2788}
2789
2790int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2791{
2792        struct drbd_config_context adm_ctx;
2793        enum drbd_ret_code retcode;
2794
2795        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2796        if (!adm_ctx.reply_skb)
2797                return retcode;
2798        if (retcode != NO_ERROR)
2799                goto out;
2800
2801        mutex_lock(&adm_ctx.resource->adm_mutex);
2802        if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2803                retcode = ERR_PAUSE_IS_SET;
2804        mutex_unlock(&adm_ctx.resource->adm_mutex);
2805out:
2806        drbd_adm_finish(&adm_ctx, info, retcode);
2807        return 0;
2808}
2809
2810int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2811{
2812        struct drbd_config_context adm_ctx;
2813        union drbd_dev_state s;
2814        enum drbd_ret_code retcode;
2815
2816        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2817        if (!adm_ctx.reply_skb)
2818                return retcode;
2819        if (retcode != NO_ERROR)
2820                goto out;
2821
2822        mutex_lock(&adm_ctx.resource->adm_mutex);
2823        if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2824                s = adm_ctx.device->state;
2825                if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2826                        retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2827                                  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2828                } else {
2829                        retcode = ERR_PAUSE_IS_CLEAR;
2830                }
2831        }
2832        mutex_unlock(&adm_ctx.resource->adm_mutex);
2833out:
2834        drbd_adm_finish(&adm_ctx, info, retcode);
2835        return 0;
2836}
2837
2838int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2839{
2840        return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2841}
2842
2843int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2844{
2845        struct drbd_config_context adm_ctx;
2846        struct drbd_device *device;
2847        int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2848
2849        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2850        if (!adm_ctx.reply_skb)
2851                return retcode;
2852        if (retcode != NO_ERROR)
2853                goto out;
2854
2855        mutex_lock(&adm_ctx.resource->adm_mutex);
2856        device = adm_ctx.device;
2857        if (test_bit(NEW_CUR_UUID, &device->flags)) {
2858                drbd_uuid_new_current(device);
2859                clear_bit(NEW_CUR_UUID, &device->flags);
2860        }
2861        drbd_suspend_io(device);
2862        retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2863        if (retcode == SS_SUCCESS) {
2864                if (device->state.conn < C_CONNECTED)
2865                        tl_clear(first_peer_device(device)->connection);
2866                if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2867                        tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2868        }
2869        drbd_resume_io(device);
2870        mutex_unlock(&adm_ctx.resource->adm_mutex);
2871out:
2872        drbd_adm_finish(&adm_ctx, info, retcode);
2873        return 0;
2874}
2875
2876int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2877{
2878        return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2879}
2880
2881static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2882                                    struct drbd_resource *resource,
2883                                    struct drbd_connection *connection,
2884                                    struct drbd_device *device)
2885{
2886        struct nlattr *nla;
2887        nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2888        if (!nla)
2889                goto nla_put_failure;
2890        if (device &&
2891            nla_put_u32(skb, T_ctx_volume, device->vnr))
2892                goto nla_put_failure;
2893        if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2894                goto nla_put_failure;
2895        if (connection) {
2896                if (connection->my_addr_len &&
2897                    nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2898                        goto nla_put_failure;
2899                if (connection->peer_addr_len &&
2900                    nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2901                        goto nla_put_failure;
2902        }
2903        nla_nest_end(skb, nla);
2904        return 0;
2905
2906nla_put_failure:
2907        if (nla)
2908                nla_nest_cancel(skb, nla);
2909        return -EMSGSIZE;
2910}
2911
2912/*
2913 * Return the connection of @resource if @resource has exactly one connection.
2914 */
2915static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2916{
2917        struct list_head *connections = &resource->connections;
2918
2919        if (list_empty(connections) || connections->next->next != connections)
2920                return NULL;
2921        return list_first_entry(&resource->connections, struct drbd_connection, connections);
2922}
2923
2924static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2925                const struct sib_info *sib)
2926{
2927        struct drbd_resource *resource = device->resource;
2928        struct state_info *si = NULL; /* for sizeof(si->member); */
2929        struct nlattr *nla;
2930        int got_ldev;
2931        int err = 0;
2932        int exclude_sensitive;
2933
2934        /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2935         * to.  So we better exclude_sensitive information.
2936         *
2937         * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2938         * in the context of the requesting user process. Exclude sensitive
2939         * information, unless current has superuser.
2940         *
2941         * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2942         * relies on the current implementation of netlink_dump(), which
2943         * executes the dump callback successively from netlink_recvmsg(),
2944         * always in the context of the receiving process */
2945        exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2946
2947        got_ldev = get_ldev(device);
2948
2949        /* We need to add connection name and volume number information still.
2950         * Minor number is in drbd_genlmsghdr. */
2951        if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2952                goto nla_put_failure;
2953
2954        if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2955                goto nla_put_failure;
2956
2957        rcu_read_lock();
2958        if (got_ldev) {
2959                struct disk_conf *disk_conf;
2960
2961                disk_conf = rcu_dereference(device->ldev->disk_conf);
2962                err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2963        }
2964        if (!err) {
2965                struct net_conf *nc;
2966
2967                nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2968                if (nc)
2969                        err = net_conf_to_skb(skb, nc, exclude_sensitive);
2970        }
2971        rcu_read_unlock();
2972        if (err)
2973                goto nla_put_failure;
2974
2975        nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2976        if (!nla)
2977                goto nla_put_failure;
2978        if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2979            nla_put_u32(skb, T_current_state, device->state.i) ||
2980            nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2981            nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2982            nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2983            nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2984            nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2985            nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2986            nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2987            nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2988            nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2989            nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2990            nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2991                goto nla_put_failure;
2992
2993        if (got_ldev) {
2994                int err;
2995
2996                spin_lock_irq(&device->ldev->md.uuid_lock);
2997                err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2998                spin_unlock_irq(&device->ldev->md.uuid_lock);
2999
3000                if (err)
3001                        goto nla_put_failure;
3002
3003                if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
3004                    nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
3005                    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
3006                        goto nla_put_failure;
3007                if (C_SYNC_SOURCE <= device->state.conn &&
3008                    C_PAUSED_SYNC_T >= device->state.conn) {
3009                        if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
3010                            nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
3011                                goto nla_put_failure;
3012                }
3013        }
3014
3015        if (sib) {
3016                switch(sib->sib_reason) {
3017                case SIB_SYNC_PROGRESS:
3018                case SIB_GET_STATUS_REPLY:
3019                        break;
3020                case SIB_STATE_CHANGE:
3021                        if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3022                            nla_put_u32(skb, T_new_state, sib->ns.i))
3023                                goto nla_put_failure;
3024                        break;
3025                case SIB_HELPER_POST:
3026                        if (nla_put_u32(skb, T_helper_exit_code,
3027                                        sib->helper_exit_code))
3028                                goto nla_put_failure;
3029                        /* fall through */
3030                case SIB_HELPER_PRE:
3031                        if (nla_put_string(skb, T_helper, sib->helper_name))
3032                                goto nla_put_failure;
3033                        break;
3034                }
3035        }
3036        nla_nest_end(skb, nla);
3037
3038        if (0)
3039nla_put_failure:
3040                err = -EMSGSIZE;
3041        if (got_ldev)
3042                put_ldev(device);
3043        return err;
3044}
3045
3046int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3047{
3048        struct drbd_config_context adm_ctx;
3049        enum drbd_ret_code retcode;
3050        int err;
3051
3052        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3053        if (!adm_ctx.reply_skb)
3054                return retcode;
3055        if (retcode != NO_ERROR)
3056                goto out;
3057
3058        err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3059        if (err) {
3060                nlmsg_free(adm_ctx.reply_skb);
3061                return err;
3062        }
3063out:
3064        drbd_adm_finish(&adm_ctx, info, retcode);
3065        return 0;
3066}
3067
3068static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3069{
3070        struct drbd_device *device;
3071        struct drbd_genlmsghdr *dh;
3072        struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3073        struct drbd_resource *resource = NULL;
3074        struct drbd_resource *tmp;
3075        unsigned volume = cb->args[1];
3076
3077        /* Open coded, deferred, iteration:
3078         * for_each_resource_safe(resource, tmp, &drbd_resources) {
3079         *      connection = "first connection of resource or undefined";
3080         *      idr_for_each_entry(&resource->devices, device, i) {
3081         *        ...
3082         *      }
3083         * }
3084         * where resource is cb->args[0];
3085         * and i is cb->args[1];
3086         *
3087         * cb->args[2] indicates if we shall loop over all resources,
3088         * or just dump all volumes of a single resource.
3089         *
3090         * This may miss entries inserted after this dump started,
3091         * or entries deleted before they are reached.
3092         *
3093         * We need to make sure the device won't disappear while
3094         * we are looking at it, and revalidate our iterators
3095         * on each iteration.
3096         */
3097
3098        /* synchronize with conn_create()/drbd_destroy_connection() */
3099        rcu_read_lock();
3100        /* revalidate iterator position */
3101        for_each_resource_rcu(tmp, &drbd_resources) {
3102                if (pos == NULL) {
3103                        /* first iteration */
3104                        pos = tmp;
3105                        resource = pos;
3106                        break;
3107                }
3108                if (tmp == pos) {
3109                        resource = pos;
3110                        break;
3111                }
3112        }
3113        if (resource) {
3114next_resource:
3115                device = idr_get_next(&resource->devices, &volume);
3116                if (!device) {
3117                        /* No more volumes to dump on this resource.
3118                         * Advance resource iterator. */
3119                        pos = list_entry_rcu(resource->resources.next,
3120                                             struct drbd_resource, resources);
3121                        /* Did we dump any volume of this resource yet? */
3122                        if (volume != 0) {
3123                                /* If we reached the end of the list,
3124                                 * or only a single resource dump was requested,
3125                                 * we are done. */
3126                                if (&pos->resources == &drbd_resources || cb->args[2])
3127                                        goto out;
3128                                volume = 0;
3129                                resource = pos;
3130                                goto next_resource;
3131                        }
3132                }
3133
3134                dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3135                                cb->nlh->nlmsg_seq, &drbd_genl_family,
3136                                NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3137                if (!dh)
3138                        goto out;
3139
3140                if (!device) {
3141                        /* This is a connection without a single volume.
3142                         * Suprisingly enough, it may have a network
3143                         * configuration. */
3144                        struct drbd_connection *connection;
3145
3146                        dh->minor = -1U;
3147                        dh->ret_code = NO_ERROR;
3148                        connection = the_only_connection(resource);
3149                        if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3150                                goto cancel;
3151                        if (connection) {
3152                                struct net_conf *nc;
3153
3154                                nc = rcu_dereference(connection->net_conf);
3155                                if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3156                                        goto cancel;
3157                        }
3158                        goto done;
3159                }
3160
3161                D_ASSERT(device, device->vnr == volume);
3162                D_ASSERT(device, device->resource == resource);
3163
3164                dh->minor = device_to_minor(device);
3165                dh->ret_code = NO_ERROR;
3166
3167                if (nla_put_status_info(skb, device, NULL)) {
3168cancel:
3169                        genlmsg_cancel(skb, dh);
3170                        goto out;
3171                }
3172done:
3173                genlmsg_end(skb, dh);
3174        }
3175
3176out:
3177        rcu_read_unlock();
3178        /* where to start the next iteration */
3179        cb->args[0] = (long)pos;
3180        cb->args[1] = (pos == resource) ? volume + 1 : 0;
3181
3182        /* No more resources/volumes/minors found results in an empty skb.
3183         * Which will terminate the dump. */
3184        return skb->len;
3185}
3186
3187/*
3188 * Request status of all resources, or of all volumes within a single resource.
3189 *
3190 * This is a dump, as the answer may not fit in a single reply skb otherwise.
3191 * Which means we cannot use the family->attrbuf or other such members, because
3192 * dump is NOT protected by the genl_lock().  During dump, we only have access
3193 * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3194 *
3195 * Once things are setup properly, we call into get_one_status().
3196 */
3197int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3198{
3199        const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3200        struct nlattr *nla;
3201        const char *resource_name;
3202        struct drbd_resource *resource;
3203        int maxtype;
3204
3205        /* Is this a followup call? */
3206        if (cb->args[0]) {
3207                /* ... of a single resource dump,
3208                 * and the resource iterator has been advanced already? */
3209                if (cb->args[2] && cb->args[2] != cb->args[0])
3210                        return 0; /* DONE. */
3211                goto dump;
3212        }
3213
3214        /* First call (from netlink_dump_start).  We need to figure out
3215         * which resource(s) the user wants us to dump. */
3216        nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3217                        nlmsg_attrlen(cb->nlh, hdrlen),
3218                        DRBD_NLA_CFG_CONTEXT);
3219
3220        /* No explicit context given.  Dump all. */
3221        if (!nla)
3222                goto dump;
3223        maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3224        nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3225        if (IS_ERR(nla))
3226                return PTR_ERR(nla);
3227        /* context given, but no name present? */
3228        if (!nla)
3229                return -EINVAL;
3230        resource_name = nla_data(nla);
3231        if (!*resource_name)
3232                return -ENODEV;
3233        resource = drbd_find_resource(resource_name);
3234        if (!resource)
3235                return -ENODEV;
3236
3237        kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3238
3239        /* prime iterators, and set "filter" mode mark:
3240         * only dump this connection. */
3241        cb->args[0] = (long)resource;
3242        /* cb->args[1] = 0; passed in this way. */
3243        cb->args[2] = (long)resource;
3244
3245dump:
3246        return get_one_status(skb, cb);
3247}
3248
3249int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3250{
3251        struct drbd_config_context adm_ctx;
3252        enum drbd_ret_code retcode;
3253        struct timeout_parms tp;
3254        int err;
3255
3256        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3257        if (!adm_ctx.reply_skb)
3258                return retcode;
3259        if (retcode != NO_ERROR)
3260                goto out;
3261
3262        tp.timeout_type =
3263                adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3264                test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3265                UT_DEFAULT;
3266
3267        err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3268        if (err) {
3269                nlmsg_free(adm_ctx.reply_skb);
3270                return err;
3271        }
3272out:
3273        drbd_adm_finish(&adm_ctx, info, retcode);
3274        return 0;
3275}
3276
3277int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3278{
3279        struct drbd_config_context adm_ctx;
3280        struct drbd_device *device;
3281        enum drbd_ret_code retcode;
3282        struct start_ov_parms parms;
3283
3284        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3285        if (!adm_ctx.reply_skb)
3286                return retcode;
3287        if (retcode != NO_ERROR)
3288                goto out;
3289
3290        device = adm_ctx.device;
3291
3292        /* resume from last known position, if possible */
3293        parms.ov_start_sector = device->ov_start_sector;
3294        parms.ov_stop_sector = ULLONG_MAX;
3295        if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3296                int err = start_ov_parms_from_attrs(&parms, info);
3297                if (err) {
3298                        retcode = ERR_MANDATORY_TAG;
3299                        drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3300                        goto out;
3301                }
3302        }
3303        mutex_lock(&adm_ctx.resource->adm_mutex);
3304
3305        /* w_make_ov_request expects position to be aligned */
3306        device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3307        device->ov_stop_sector = parms.ov_stop_sector;
3308
3309        /* If there is still bitmap IO pending, e.g. previous resync or verify
3310         * just being finished, wait for it before requesting a new resync. */
3311        drbd_suspend_io(device);
3312        wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3313        retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3314        drbd_resume_io(device);
3315
3316        mutex_unlock(&adm_ctx.resource->adm_mutex);
3317out:
3318        drbd_adm_finish(&adm_ctx, info, retcode);
3319        return 0;
3320}
3321
3322
3323int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3324{
3325        struct drbd_config_context adm_ctx;
3326        struct drbd_device *device;
3327        enum drbd_ret_code retcode;
3328        int skip_initial_sync = 0;
3329        int err;
3330        struct new_c_uuid_parms args;
3331
3332        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3333        if (!adm_ctx.reply_skb)
3334                return retcode;
3335        if (retcode != NO_ERROR)
3336                goto out_nolock;
3337
3338        device = adm_ctx.device;
3339        memset(&args, 0, sizeof(args));
3340        if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3341                err = new_c_uuid_parms_from_attrs(&args, info);
3342                if (err) {
3343                        retcode = ERR_MANDATORY_TAG;
3344                        drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3345                        goto out_nolock;
3346                }
3347        }
3348
3349        mutex_lock(&adm_ctx.resource->adm_mutex);
3350        mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3351
3352        if (!get_ldev(device)) {
3353                retcode = ERR_NO_DISK;
3354                goto out;
3355        }
3356
3357        /* this is "skip initial sync", assume to be clean */
3358        if (device->state.conn == C_CONNECTED &&
3359            first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3360            device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3361                drbd_info(device, "Preparing to skip initial sync\n");
3362                skip_initial_sync = 1;
3363        } else if (device->state.conn != C_STANDALONE) {
3364                retcode = ERR_CONNECTED;
3365                goto out_dec;
3366        }
3367
3368        drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3369        drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3370
3371        if (args.clear_bm) {
3372                err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3373                        "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3374                if (err) {
3375                        drbd_err(device, "Writing bitmap failed with %d\n", err);
3376                        retcode = ERR_IO_MD_DISK;
3377                }
3378                if (skip_initial_sync) {
3379                        drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3380                        _drbd_uuid_set(device, UI_BITMAP, 0);
3381                        drbd_print_uuids(device, "cleared bitmap UUID");
3382                        spin_lock_irq(&device->resource->req_lock);
3383                        _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3384                                        CS_VERBOSE, NULL);
3385                        spin_unlock_irq(&device->resource->req_lock);
3386                }
3387        }
3388
3389        drbd_md_sync(device);
3390out_dec:
3391        put_ldev(device);
3392out:
3393        mutex_unlock(device->state_mutex);
3394        mutex_unlock(&adm_ctx.resource->adm_mutex);
3395out_nolock:
3396        drbd_adm_finish(&adm_ctx, info, retcode);
3397        return 0;
3398}
3399
3400static enum drbd_ret_code
3401drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3402{
3403        const char *name = adm_ctx->resource_name;
3404        if (!name || !name[0]) {
3405                drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3406                return ERR_MANDATORY_TAG;
3407        }
3408        /* if we want to use these in sysfs/configfs/debugfs some day,
3409         * we must not allow slashes */
3410        if (strchr(name, '/')) {
3411                drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3412                return ERR_INVALID_REQUEST;
3413        }
3414        return NO_ERROR;
3415}
3416
3417int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3418{
3419        struct drbd_config_context adm_ctx;
3420        enum drbd_ret_code retcode;
3421        struct res_opts res_opts;
3422        int err;
3423
3424        retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3425        if (!adm_ctx.reply_skb)
3426                return retcode;
3427        if (retcode != NO_ERROR)
3428                goto out;
3429
3430        set_res_opts_defaults(&res_opts);
3431        err = res_opts_from_attrs(&res_opts, info);
3432        if (err && err != -ENOMSG) {
3433                retcode = ERR_MANDATORY_TAG;
3434                drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3435                goto out;
3436        }
3437
3438        retcode = drbd_check_resource_name(&adm_ctx);
3439        if (retcode != NO_ERROR)
3440                goto out;
3441
3442        if (adm_ctx.resource) {
3443                if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3444                        retcode = ERR_INVALID_REQUEST;
3445                        drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3446                }
3447                /* else: still NO_ERROR */
3448                goto out;
3449        }
3450
3451        /* not yet safe for genl_family.parallel_ops */
3452        if (!conn_create(adm_ctx.resource_name, &res_opts))
3453                retcode = ERR_NOMEM;
3454out:
3455        drbd_adm_finish(&adm_ctx, info, retcode);
3456        return 0;
3457}
3458
3459int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3460{
3461        struct drbd_config_context adm_ctx;
3462        struct drbd_genlmsghdr *dh = info->userhdr;
3463        enum drbd_ret_code retcode;
3464
3465        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3466        if (!adm_ctx.reply_skb)
3467                return retcode;
3468        if (retcode != NO_ERROR)
3469                goto out;
3470
3471        if (dh->minor > MINORMASK) {
3472                drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3473                retcode = ERR_INVALID_REQUEST;
3474                goto out;
3475        }
3476        if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3477                drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3478                retcode = ERR_INVALID_REQUEST;
3479                goto out;
3480        }
3481
3482        /* drbd_adm_prepare made sure already
3483         * that first_peer_device(device)->connection and device->vnr match the request. */
3484        if (adm_ctx.device) {
3485                if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3486                        retcode = ERR_MINOR_EXISTS;
3487                /* else: still NO_ERROR */
3488                goto out;
3489        }
3490
3491        mutex_lock(&adm_ctx.resource->adm_mutex);
3492        retcode = drbd_create_device(&adm_ctx, dh->minor);
3493        mutex_unlock(&adm_ctx.resource->adm_mutex);
3494out:
3495        drbd_adm_finish(&adm_ctx, info, retcode);
3496        return 0;
3497}
3498
3499static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3500{
3501        if (device->state.disk == D_DISKLESS &&
3502            /* no need to be device->state.conn == C_STANDALONE &&
3503             * we may want to delete a minor from a live replication group.
3504             */
3505            device->state.role == R_SECONDARY) {
3506                _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3507                                    CS_VERBOSE + CS_WAIT_COMPLETE);
3508                drbd_delete_device(device);
3509                return NO_ERROR;
3510        } else
3511                return ERR_MINOR_CONFIGURED;
3512}
3513
3514int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3515{
3516        struct drbd_config_context adm_ctx;
3517        enum drbd_ret_code retcode;
3518
3519        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3520        if (!adm_ctx.reply_skb)
3521                return retcode;
3522        if (retcode != NO_ERROR)
3523                goto out;
3524
3525        mutex_lock(&adm_ctx.resource->adm_mutex);
3526        retcode = adm_del_minor(adm_ctx.device);
3527        mutex_unlock(&adm_ctx.resource->adm_mutex);
3528out:
3529        drbd_adm_finish(&adm_ctx, info, retcode);
3530        return 0;
3531}
3532
3533int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3534{
3535        struct drbd_config_context adm_ctx;
3536        struct drbd_resource *resource;
3537        struct drbd_connection *connection;
3538        struct drbd_device *device;
3539        int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3540        unsigned i;
3541
3542        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3543        if (!adm_ctx.reply_skb)
3544                return retcode;
3545        if (retcode != NO_ERROR)
3546                goto finish;
3547
3548        resource = adm_ctx.resource;
3549        mutex_lock(&resource->adm_mutex);
3550        /* demote */
3551        for_each_connection(connection, resource) {
3552                struct drbd_peer_device *peer_device;
3553
3554                idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3555                        retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3556                        if (retcode < SS_SUCCESS) {
3557                                drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3558                                goto out;
3559                        }
3560                }
3561
3562                retcode = conn_try_disconnect(connection, 0);
3563                if (retcode < SS_SUCCESS) {
3564                        drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3565                        goto out;
3566                }
3567        }
3568
3569        /* detach */
3570        idr_for_each_entry(&resource->devices, device, i) {
3571                retcode = adm_detach(device, 0);
3572                if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3573                        drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3574                        goto out;
3575                }
3576        }
3577
3578        /* If we reach this, all volumes (of this connection) are Secondary,
3579         * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3580         * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3581        for_each_connection(connection, resource)
3582                drbd_thread_stop(&connection->worker);
3583
3584        /* Now, nothing can fail anymore */
3585
3586        /* delete volumes */
3587        idr_for_each_entry(&resource->devices, device, i) {
3588                retcode = adm_del_minor(device);
3589                if (retcode != NO_ERROR) {
3590                        /* "can not happen" */
3591                        drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3592                        goto out;
3593                }
3594        }
3595
3596        list_del_rcu(&resource->resources);
3597        synchronize_rcu();
3598        drbd_free_resource(resource);
3599        retcode = NO_ERROR;
3600out:
3601        mutex_unlock(&resource->adm_mutex);
3602finish:
3603        drbd_adm_finish(&adm_ctx, info, retcode);
3604        return 0;
3605}
3606
3607int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3608{
3609        struct drbd_config_context adm_ctx;
3610        struct drbd_resource *resource;
3611        struct drbd_connection *connection;
3612        enum drbd_ret_code retcode;
3613
3614        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3615        if (!adm_ctx.reply_skb)
3616                return retcode;
3617        if (retcode != NO_ERROR)
3618                goto finish;
3619
3620        resource = adm_ctx.resource;
3621        mutex_lock(&resource->adm_mutex);
3622        for_each_connection(connection, resource) {
3623                if (connection->cstate > C_STANDALONE) {
3624                        retcode = ERR_NET_CONFIGURED;
3625                        goto out;
3626                }
3627        }
3628        if (!idr_is_empty(&resource->devices)) {
3629                retcode = ERR_RES_IN_USE;
3630                goto out;
3631        }
3632
3633        list_del_rcu(&resource->resources);
3634        for_each_connection(connection, resource)
3635                drbd_thread_stop(&connection->worker);
3636        synchronize_rcu();
3637        drbd_free_resource(resource);
3638        retcode = NO_ERROR;
3639out:
3640        mutex_unlock(&resource->adm_mutex);
3641finish:
3642        drbd_adm_finish(&adm_ctx, info, retcode);
3643        return 0;
3644}
3645
3646void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3647{
3648        static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3649        struct sk_buff *msg;
3650        struct drbd_genlmsghdr *d_out;
3651        unsigned seq;
3652        int err = -ENOMEM;
3653
3654        seq = atomic_inc_return(&drbd_genl_seq);
3655        msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3656        if (!msg)
3657                goto failed;
3658
3659        err = -EMSGSIZE;
3660        d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3661        if (!d_out) /* cannot happen, but anyways. */
3662                goto nla_put_failure;
3663        d_out->minor = device_to_minor(device);
3664        d_out->ret_code = NO_ERROR;
3665
3666        if (nla_put_status_info(msg, device, sib))
3667                goto nla_put_failure;
3668        genlmsg_end(msg, d_out);
3669        err = drbd_genl_multicast_events(msg, 0);
3670        /* msg has been consumed or freed in netlink_broadcast() */
3671        if (err && err != -ESRCH)
3672                goto failed;
3673
3674        return;
3675
3676nla_put_failure:
3677        nlmsg_free(msg);
3678failed:
3679        drbd_err(device, "Error %d while broadcasting event. "
3680                        "Event seq:%u sib_reason:%u\n",
3681                        err, seq, sib->sib_reason);
3682}
3683