linux/fs/afs/rotate.c
<<
>>
Prefs
   1/* Handle fileserver selection and rotation.
   2 *
   3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
   4 * Written by David Howells (dhowells@redhat.com)
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public Licence
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the Licence, or (at your option) any later version.
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/slab.h>
  14#include <linux/fs.h>
  15#include <linux/sched.h>
  16#include <linux/delay.h>
  17#include <linux/sched/signal.h>
  18#include "internal.h"
  19#include "afs_fs.h"
  20
  21/*
  22 * Initialise a filesystem server cursor for iterating over FS servers.
  23 */
  24static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
  25{
  26        memset(fc, 0, sizeof(*fc));
  27}
  28
  29/*
  30 * Begin an operation on the fileserver.
  31 *
  32 * Fileserver operations are serialised on the server by vnode, so we serialise
  33 * them here also using the io_lock.
  34 */
  35bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
  36                               struct key *key)
  37{
  38        afs_init_fs_cursor(fc, vnode);
  39        fc->vnode = vnode;
  40        fc->key = key;
  41        fc->ac.error = SHRT_MAX;
  42
  43        if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
  44                fc->ac.error = -EINTR;
  45                fc->flags |= AFS_FS_CURSOR_STOP;
  46                return false;
  47        }
  48
  49        if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
  50                fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
  51        return true;
  52}
  53
  54/*
  55 * Begin iteration through a server list, starting with the vnode's last used
  56 * server if possible, or the last recorded good server if not.
  57 */
  58static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
  59                                   struct afs_vnode *vnode)
  60{
  61        struct afs_cb_interest *cbi;
  62        int i;
  63
  64        read_lock(&vnode->volume->servers_lock);
  65        fc->server_list = afs_get_serverlist(vnode->volume->servers);
  66        read_unlock(&vnode->volume->servers_lock);
  67
  68        cbi = vnode->cb_interest;
  69        if (cbi) {
  70                /* See if the vnode's preferred record is still available */
  71                for (i = 0; i < fc->server_list->nr_servers; i++) {
  72                        if (fc->server_list->servers[i].cb_interest == cbi) {
  73                                fc->start = i;
  74                                goto found_interest;
  75                        }
  76                }
  77
  78                /* If we have a lock outstanding on a server that's no longer
  79                 * serving this vnode, then we can't switch to another server
  80                 * and have to return an error.
  81                 */
  82                if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
  83                        fc->ac.error = -ESTALE;
  84                        return false;
  85                }
  86
  87                /* Note that the callback promise is effectively broken */
  88                write_seqlock(&vnode->cb_lock);
  89                ASSERTCMP(cbi, ==, vnode->cb_interest);
  90                vnode->cb_interest = NULL;
  91                if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
  92                        vnode->cb_break++;
  93                write_sequnlock(&vnode->cb_lock);
  94
  95                afs_put_cb_interest(afs_v2net(vnode), cbi);
  96                cbi = NULL;
  97        } else {
  98                fc->start = READ_ONCE(fc->server_list->index);
  99        }
 100
 101found_interest:
 102        fc->index = fc->start;
 103        return true;
 104}
 105
 106/*
 107 * Post volume busy note.
 108 */
 109static void afs_busy(struct afs_volume *volume, u32 abort_code)
 110{
 111        const char *m;
 112
 113        switch (abort_code) {
 114        case VOFFLINE:          m = "offline";          break;
 115        case VRESTARTING:       m = "restarting";       break;
 116        case VSALVAGING:        m = "being salvaged";   break;
 117        default:                m = "busy";             break;
 118        }
 119
 120        pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
 121}
 122
 123/*
 124 * Sleep and retry the operation to the same fileserver.
 125 */
 126static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
 127{
 128        msleep_interruptible(1000);
 129        if (signal_pending(current)) {
 130                fc->ac.error = -ERESTARTSYS;
 131                return false;
 132        }
 133
 134        return true;
 135}
 136
 137/*
 138 * Select the fileserver to use.  May be called multiple times to rotate
 139 * through the fileservers.
 140 */
 141bool afs_select_fileserver(struct afs_fs_cursor *fc)
 142{
 143        struct afs_addr_list *alist;
 144        struct afs_server *server;
 145        struct afs_vnode *vnode = fc->vnode;
 146
 147        _enter("%u/%u,%u/%u,%d,%d",
 148               fc->index, fc->start,
 149               fc->ac.index, fc->ac.start,
 150               fc->ac.error, fc->ac.abort_code);
 151
 152        if (fc->flags & AFS_FS_CURSOR_STOP) {
 153                _leave(" = f [stopped]");
 154                return false;
 155        }
 156
 157        /* Evaluate the result of the previous operation, if there was one. */
 158        switch (fc->ac.error) {
 159        case SHRT_MAX:
 160                goto start;
 161
 162        case 0:
 163        default:
 164                /* Success or local failure.  Stop. */
 165                fc->flags |= AFS_FS_CURSOR_STOP;
 166                _leave(" = f [okay/local %d]", fc->ac.error);
 167                return false;
 168
 169        case -ECONNABORTED:
 170                /* The far side rejected the operation on some grounds.  This
 171                 * might involve the server being busy or the volume having been moved.
 172                 */
 173                switch (fc->ac.abort_code) {
 174                case VNOVOL:
 175                        /* This fileserver doesn't know about the volume.
 176                         * - May indicate that the VL is wrong - retry once and compare
 177                         *   the results.
 178                         * - May indicate that the fileserver couldn't attach to the vol.
 179                         */
 180                        if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
 181                                fc->ac.error = -EREMOTEIO;
 182                                goto next_server;
 183                        }
 184
 185                        write_lock(&vnode->volume->servers_lock);
 186                        fc->server_list->vnovol_mask |= 1 << fc->index;
 187                        write_unlock(&vnode->volume->servers_lock);
 188
 189                        set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
 190                        fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
 191                        if (fc->ac.error < 0)
 192                                goto failed;
 193
 194                        if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
 195                                fc->ac.error = -ENOMEDIUM;
 196                                goto failed;
 197                        }
 198
 199                        /* If the server list didn't change, then assume that
 200                         * it's the fileserver having trouble.
 201                         */
 202                        if (vnode->volume->servers == fc->server_list) {
 203                                fc->ac.error = -EREMOTEIO;
 204                                goto next_server;
 205                        }
 206
 207                        /* Try again */
 208                        fc->flags |= AFS_FS_CURSOR_VNOVOL;
 209                        _leave(" = t [vnovol]");
 210                        return true;
 211
 212                case VSALVAGE: /* TODO: Should this return an error or iterate? */
 213                case VVOLEXISTS:
 214                case VNOSERVICE:
 215                case VONLINE:
 216                case VDISKFULL:
 217                case VOVERQUOTA:
 218                        fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
 219                        goto next_server;
 220
 221                case VOFFLINE:
 222                        if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) {
 223                                afs_busy(vnode->volume, fc->ac.abort_code);
 224                                clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
 225                        }
 226                        if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
 227                                fc->ac.error = -EADV;
 228                                goto failed;
 229                        }
 230                        if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
 231                                fc->ac.error = -ESTALE;
 232                                goto failed;
 233                        }
 234                        goto busy;
 235
 236                case VSALVAGING:
 237                case VRESTARTING:
 238                case VBUSY:
 239                        /* Retry after going round all the servers unless we
 240                         * have a file lock we need to maintain.
 241                         */
 242                        if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
 243                                fc->ac.error = -EBUSY;
 244                                goto failed;
 245                        }
 246                        if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
 247                                afs_busy(vnode->volume, fc->ac.abort_code);
 248                                clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
 249                        }
 250                busy:
 251                        if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
 252                                if (!afs_sleep_and_retry(fc))
 253                                        goto failed;
 254
 255                                 /* Retry with same server & address */
 256                                _leave(" = t [vbusy]");
 257                                return true;
 258                        }
 259
 260                        fc->flags |= AFS_FS_CURSOR_VBUSY;
 261                        goto next_server;
 262
 263                case VMOVED:
 264                        /* The volume migrated to another server.  We consider
 265                         * consider all locks and callbacks broken and request
 266                         * an update from the VLDB.
 267                         *
 268                         * We also limit the number of VMOVED hops we will
 269                         * honour, just in case someone sets up a loop.
 270                         */
 271                        if (fc->flags & AFS_FS_CURSOR_VMOVED) {
 272                                fc->ac.error = -EREMOTEIO;
 273                                goto failed;
 274                        }
 275                        fc->flags |= AFS_FS_CURSOR_VMOVED;
 276
 277                        set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
 278                        set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
 279                        fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
 280                        if (fc->ac.error < 0)
 281                                goto failed;
 282
 283                        /* If the server list didn't change, then the VLDB is
 284                         * out of sync with the fileservers.  This is hopefully
 285                         * a temporary condition, however, so we don't want to
 286                         * permanently block access to the file.
 287                         *
 288                         * TODO: Try other fileservers if we can.
 289                         *
 290                         * TODO: Retry a few times with sleeps.
 291                         */
 292                        if (vnode->volume->servers == fc->server_list) {
 293                                fc->ac.error = -ENOMEDIUM;
 294                                goto failed;
 295                        }
 296
 297                        goto restart_from_beginning;
 298
 299                default:
 300                        clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
 301                        clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
 302                        fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
 303                        goto failed;
 304                }
 305
 306        case -ENETUNREACH:
 307        case -EHOSTUNREACH:
 308        case -ECONNREFUSED:
 309        case -ETIMEDOUT:
 310        case -ETIME:
 311                _debug("no conn");
 312                goto iterate_address;
 313
 314        case -ECONNRESET:
 315                _debug("call reset");
 316                goto failed;
 317        }
 318
 319restart_from_beginning:
 320        _debug("restart");
 321        afs_end_cursor(&fc->ac);
 322        afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
 323        fc->cbi = NULL;
 324        afs_put_serverlist(afs_v2net(vnode), fc->server_list);
 325        fc->server_list = NULL;
 326start:
 327        _debug("start");
 328        /* See if we need to do an update of the volume record.  Note that the
 329         * volume may have moved or even have been deleted.
 330         */
 331        fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
 332        if (fc->ac.error < 0)
 333                goto failed;
 334
 335        if (!afs_start_fs_iteration(fc, vnode))
 336                goto failed;
 337
 338use_server:
 339        _debug("use");
 340        /* We're starting on a different fileserver from the list.  We need to
 341         * check it, create a callback intercept, find its address list and
 342         * probe its capabilities before we use it.
 343         */
 344        ASSERTCMP(fc->ac.alist, ==, NULL);
 345        server = fc->server_list->servers[fc->index].server;
 346
 347        if (!afs_check_server_record(fc, server))
 348                goto failed;
 349
 350        _debug("USING SERVER: %pU", &server->uuid);
 351
 352        /* Make sure we've got a callback interest record for this server.  We
 353         * have to link it in before we send the request as we can be sent a
 354         * break request before we've finished decoding the reply and
 355         * installing the vnode.
 356         */
 357        fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
 358                                                       fc->index);
 359        if (fc->ac.error < 0)
 360                goto failed;
 361
 362        fc->cbi = afs_get_cb_interest(vnode->cb_interest);
 363
 364        read_lock(&server->fs_lock);
 365        alist = rcu_dereference_protected(server->addresses,
 366                                          lockdep_is_held(&server->fs_lock));
 367        afs_get_addrlist(alist);
 368        read_unlock(&server->fs_lock);
 369
 370        memset(&fc->ac, 0, sizeof(fc->ac));
 371
 372        /* Probe the current fileserver if we haven't done so yet. */
 373        if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
 374                fc->ac.alist = afs_get_addrlist(alist);
 375
 376                if (!afs_probe_fileserver(fc)) {
 377                        switch (fc->ac.error) {
 378                        case -ENOMEM:
 379                        case -ERESTARTSYS:
 380                        case -EINTR:
 381                                goto failed;
 382                        default:
 383                                goto next_server;
 384                        }
 385                }
 386        }
 387
 388        if (!fc->ac.alist)
 389                fc->ac.alist = alist;
 390        else
 391                afs_put_addrlist(alist);
 392
 393        fc->ac.start = READ_ONCE(alist->index);
 394        fc->ac.index = fc->ac.start;
 395
 396iterate_address:
 397        ASSERT(fc->ac.alist);
 398        _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
 399        /* Iterate over the current server's address list to try and find an
 400         * address on which it will respond to us.
 401         */
 402        if (!afs_iterate_addresses(&fc->ac))
 403                goto next_server;
 404
 405        _leave(" = t");
 406        return true;
 407
 408next_server:
 409        _debug("next");
 410        afs_end_cursor(&fc->ac);
 411        afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
 412        fc->cbi = NULL;
 413        fc->index++;
 414        if (fc->index >= fc->server_list->nr_servers)
 415                fc->index = 0;
 416        if (fc->index != fc->start)
 417                goto use_server;
 418
 419        /* That's all the servers poked to no good effect.  Try again if some
 420         * of them were busy.
 421         */
 422        if (fc->flags & AFS_FS_CURSOR_VBUSY)
 423                goto restart_from_beginning;
 424
 425        fc->ac.error = -EDESTADDRREQ;
 426        goto failed;
 427
 428failed:
 429        fc->flags |= AFS_FS_CURSOR_STOP;
 430        afs_end_cursor(&fc->ac);
 431        _leave(" = f [failed %d]", fc->ac.error);
 432        return false;
 433}
 434
 435/*
 436 * Select the same fileserver we used for a vnode before and only that
 437 * fileserver.  We use this when we have a lock on that file, which is backed
 438 * only by the fileserver we obtained it from.
 439 */
 440bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 441{
 442        struct afs_vnode *vnode = fc->vnode;
 443        struct afs_cb_interest *cbi = vnode->cb_interest;
 444        struct afs_addr_list *alist;
 445
 446        _enter("");
 447
 448        switch (fc->ac.error) {
 449        case SHRT_MAX:
 450                if (!cbi) {
 451                        fc->ac.error = -ESTALE;
 452                        fc->flags |= AFS_FS_CURSOR_STOP;
 453                        return false;
 454                }
 455
 456                fc->cbi = afs_get_cb_interest(vnode->cb_interest);
 457
 458                read_lock(&cbi->server->fs_lock);
 459                alist = rcu_dereference_protected(cbi->server->addresses,
 460                                                  lockdep_is_held(&cbi->server->fs_lock));
 461                afs_get_addrlist(alist);
 462                read_unlock(&cbi->server->fs_lock);
 463                if (!alist) {
 464                        fc->ac.error = -ESTALE;
 465                        fc->flags |= AFS_FS_CURSOR_STOP;
 466                        return false;
 467                }
 468
 469                memset(&fc->ac, 0, sizeof(fc->ac));
 470                fc->ac.alist = alist;
 471                fc->ac.start = READ_ONCE(alist->index);
 472                fc->ac.index = fc->ac.start;
 473                goto iterate_address;
 474
 475        case 0:
 476        default:
 477                /* Success or local failure.  Stop. */
 478                fc->flags |= AFS_FS_CURSOR_STOP;
 479                _leave(" = f [okay/local %d]", fc->ac.error);
 480                return false;
 481
 482        case -ECONNABORTED:
 483                fc->flags |= AFS_FS_CURSOR_STOP;
 484                _leave(" = f [abort]");
 485                return false;
 486
 487        case -ENETUNREACH:
 488        case -EHOSTUNREACH:
 489        case -ECONNREFUSED:
 490        case -ETIMEDOUT:
 491        case -ETIME:
 492                _debug("no conn");
 493                goto iterate_address;
 494        }
 495
 496iterate_address:
 497        /* Iterate over the current server's address list to try and find an
 498         * address on which it will respond to us.
 499         */
 500        if (afs_iterate_addresses(&fc->ac)) {
 501                _leave(" = t");
 502                return true;
 503        }
 504
 505        afs_end_cursor(&fc->ac);
 506        return false;
 507}
 508
 509/*
 510 * Tidy up a filesystem cursor and unlock the vnode.
 511 */
 512int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 513{
 514        struct afs_net *net = afs_v2net(fc->vnode);
 515        int ret;
 516
 517        mutex_unlock(&fc->vnode->io_lock);
 518
 519        afs_end_cursor(&fc->ac);
 520        afs_put_cb_interest(net, fc->cbi);
 521        afs_put_serverlist(net, fc->server_list);
 522
 523        ret = fc->ac.error;
 524        if (ret == -ECONNABORTED)
 525                afs_abort_to_error(fc->ac.abort_code);
 526
 527        return fc->ac.error;
 528}
 529