linux/fs/afs/volume.c
<<
>>
Prefs
   1/* AFS volume management
   2 *
   3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
   4 * Written by David Howells (dhowells@redhat.com)
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/module.h>
  14#include <linux/init.h>
  15#include <linux/slab.h>
  16#include <linux/fs.h>
  17#include <linux/pagemap.h>
  18#include <linux/sched.h>
  19#include "internal.h"
  20
  21static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  22
  23/*
  24 * lookup a volume by name
  25 * - this can be one of the following:
  26 *      "%[cell:]volume[.]"             R/W volume
  27 *      "#[cell:]volume[.]"             R/O or R/W volume (rwparent=0),
  28 *                                       or R/W (rwparent=1) volume
  29 *      "%[cell:]volume.readonly"       R/O volume
  30 *      "#[cell:]volume.readonly"       R/O volume
  31 *      "%[cell:]volume.backup"         Backup volume
  32 *      "#[cell:]volume.backup"         Backup volume
  33 *
  34 * The cell name is optional, and defaults to the current cell.
  35 *
  36 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  37 * Guide
  38 * - Rule 1: Explicit type suffix forces access of that type or nothing
  39 *           (no suffix, then use Rule 2 & 3)
  40 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  41 *           if not available
  42 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  43 *           explicitly told otherwise
  44 */
  45struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
  46{
  47        struct afs_vlocation *vlocation = NULL;
  48        struct afs_volume *volume = NULL;
  49        struct afs_server *server = NULL;
  50        char srvtmask;
  51        int ret, loop;
  52
  53        _enter("{%*.*s,%d}",
  54               params->volnamesz, params->volnamesz, params->volname, params->rwpath);
  55
  56        /* lookup the volume location record */
  57        vlocation = afs_vlocation_lookup(params->cell, params->key,
  58                                         params->volname, params->volnamesz);
  59        if (IS_ERR(vlocation)) {
  60                ret = PTR_ERR(vlocation);
  61                vlocation = NULL;
  62                goto error;
  63        }
  64
  65        /* make the final decision on the type we want */
  66        ret = -ENOMEDIUM;
  67        if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
  68                goto error;
  69
  70        srvtmask = 0;
  71        for (loop = 0; loop < vlocation->vldb.nservers; loop++)
  72                srvtmask |= vlocation->vldb.srvtmask[loop];
  73
  74        if (params->force) {
  75                if (!(srvtmask & (1 << params->type)))
  76                        goto error;
  77        } else if (srvtmask & AFS_VOL_VTM_RO) {
  78                params->type = AFSVL_ROVOL;
  79        } else if (srvtmask & AFS_VOL_VTM_RW) {
  80                params->type = AFSVL_RWVOL;
  81        } else {
  82                goto error;
  83        }
  84
  85        down_write(&params->cell->vl_sem);
  86
  87        /* is the volume already active? */
  88        if (vlocation->vols[params->type]) {
  89                /* yes - re-use it */
  90                volume = vlocation->vols[params->type];
  91                afs_get_volume(volume);
  92                goto success;
  93        }
  94
  95        /* create a new volume record */
  96        _debug("creating new volume record");
  97
  98        ret = -ENOMEM;
  99        volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
 100        if (!volume)
 101                goto error_up;
 102
 103        atomic_set(&volume->usage, 1);
 104        volume->type            = params->type;
 105        volume->type_force      = params->force;
 106        volume->cell            = params->cell;
 107        volume->vid             = vlocation->vldb.vid[params->type];
 108
 109        ret = bdi_setup_and_register(&volume->bdi, "afs");
 110        if (ret)
 111                goto error_bdi;
 112
 113        init_rwsem(&volume->server_sem);
 114
 115        /* look up all the applicable server records */
 116        for (loop = 0; loop < 8; loop++) {
 117                if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
 118                        server = afs_lookup_server(
 119                               volume->cell, &vlocation->vldb.servers[loop]);
 120                        if (IS_ERR(server)) {
 121                                ret = PTR_ERR(server);
 122                                goto error_discard;
 123                        }
 124
 125                        volume->servers[volume->nservers] = server;
 126                        volume->nservers++;
 127                }
 128        }
 129
 130        /* attach the cache and volume location */
 131#ifdef CONFIG_AFS_FSCACHE
 132        volume->cache = fscache_acquire_cookie(vlocation->cache,
 133                                               &afs_volume_cache_index_def,
 134                                               volume, true);
 135#endif
 136        afs_get_vlocation(vlocation);
 137        volume->vlocation = vlocation;
 138
 139        vlocation->vols[volume->type] = volume;
 140
 141success:
 142        _debug("kAFS selected %s volume %08x",
 143               afs_voltypes[volume->type], volume->vid);
 144        up_write(&params->cell->vl_sem);
 145        afs_put_vlocation(vlocation);
 146        _leave(" = %p", volume);
 147        return volume;
 148
 149        /* clean up */
 150error_up:
 151        up_write(&params->cell->vl_sem);
 152error:
 153        afs_put_vlocation(vlocation);
 154        _leave(" = %d", ret);
 155        return ERR_PTR(ret);
 156
 157error_discard:
 158        bdi_destroy(&volume->bdi);
 159error_bdi:
 160        up_write(&params->cell->vl_sem);
 161
 162        for (loop = volume->nservers - 1; loop >= 0; loop--)
 163                afs_put_server(volume->servers[loop]);
 164
 165        kfree(volume);
 166        goto error;
 167}
 168
 169/*
 170 * destroy a volume record
 171 */
 172void afs_put_volume(struct afs_volume *volume)
 173{
 174        struct afs_vlocation *vlocation;
 175        int loop;
 176
 177        if (!volume)
 178                return;
 179
 180        _enter("%p", volume);
 181
 182        ASSERTCMP(atomic_read(&volume->usage), >, 0);
 183
 184        vlocation = volume->vlocation;
 185
 186        /* to prevent a race, the decrement and the dequeue must be effectively
 187         * atomic */
 188        down_write(&vlocation->cell->vl_sem);
 189
 190        if (likely(!atomic_dec_and_test(&volume->usage))) {
 191                up_write(&vlocation->cell->vl_sem);
 192                _leave("");
 193                return;
 194        }
 195
 196        vlocation->vols[volume->type] = NULL;
 197
 198        up_write(&vlocation->cell->vl_sem);
 199
 200        /* finish cleaning up the volume */
 201#ifdef CONFIG_AFS_FSCACHE
 202        fscache_relinquish_cookie(volume->cache, 0);
 203#endif
 204        afs_put_vlocation(vlocation);
 205
 206        for (loop = volume->nservers - 1; loop >= 0; loop--)
 207                afs_put_server(volume->servers[loop]);
 208
 209        bdi_destroy(&volume->bdi);
 210        kfree(volume);
 211
 212        _leave(" [destroyed]");
 213}
 214
 215/*
 216 * pick a server to use to try accessing this volume
 217 * - returns with an elevated usage count on the server chosen
 218 */
 219struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 220{
 221        struct afs_volume *volume = vnode->volume;
 222        struct afs_server *server;
 223        int ret, state, loop;
 224
 225        _enter("%s", volume->vlocation->vldb.name);
 226
 227        /* stick with the server we're already using if we can */
 228        if (vnode->server && vnode->server->fs_state == 0) {
 229                afs_get_server(vnode->server);
 230                _leave(" = %p [current]", vnode->server);
 231                return vnode->server;
 232        }
 233
 234        down_read(&volume->server_sem);
 235
 236        /* handle the no-server case */
 237        if (volume->nservers == 0) {
 238                ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
 239                up_read(&volume->server_sem);
 240                _leave(" = %d [no servers]", ret);
 241                return ERR_PTR(ret);
 242        }
 243
 244        /* basically, just search the list for the first live server and use
 245         * that */
 246        ret = 0;
 247        for (loop = 0; loop < volume->nservers; loop++) {
 248                server = volume->servers[loop];
 249                state = server->fs_state;
 250
 251                _debug("consider %d [%d]", loop, state);
 252
 253                switch (state) {
 254                        /* found an apparently healthy server */
 255                case 0:
 256                        afs_get_server(server);
 257                        up_read(&volume->server_sem);
 258                        _leave(" = %p (picked %08x)",
 259                               server, ntohl(server->addr.s_addr));
 260                        return server;
 261
 262                case -ENETUNREACH:
 263                        if (ret == 0)
 264                                ret = state;
 265                        break;
 266
 267                case -EHOSTUNREACH:
 268                        if (ret == 0 ||
 269                            ret == -ENETUNREACH)
 270                                ret = state;
 271                        break;
 272
 273                case -ECONNREFUSED:
 274                        if (ret == 0 ||
 275                            ret == -ENETUNREACH ||
 276                            ret == -EHOSTUNREACH)
 277                                ret = state;
 278                        break;
 279
 280                default:
 281                case -EREMOTEIO:
 282                        if (ret == 0 ||
 283                            ret == -ENETUNREACH ||
 284                            ret == -EHOSTUNREACH ||
 285                            ret == -ECONNREFUSED)
 286                                ret = state;
 287                        break;
 288                }
 289        }
 290
 291        /* no available servers
 292         * - TODO: handle the no active servers case better
 293         */
 294        up_read(&volume->server_sem);
 295        _leave(" = %d", ret);
 296        return ERR_PTR(ret);
 297}
 298
 299/*
 300 * release a server after use
 301 * - releases the ref on the server struct that was acquired by picking
 302 * - records result of using a particular server to access a volume
 303 * - return 0 to try again, 1 if okay or to issue error
 304 * - the caller must release the server struct if result was 0
 305 */
 306int afs_volume_release_fileserver(struct afs_vnode *vnode,
 307                                  struct afs_server *server,
 308                                  int result)
 309{
 310        struct afs_volume *volume = vnode->volume;
 311        unsigned loop;
 312
 313        _enter("%s,%08x,%d",
 314               volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
 315               result);
 316
 317        switch (result) {
 318                /* success */
 319        case 0:
 320                server->fs_act_jif = jiffies;
 321                server->fs_state = 0;
 322                _leave("");
 323                return 1;
 324
 325                /* the fileserver denied all knowledge of the volume */
 326        case -ENOMEDIUM:
 327                server->fs_act_jif = jiffies;
 328                down_write(&volume->server_sem);
 329
 330                /* firstly, find where the server is in the active list (if it
 331                 * is) */
 332                for (loop = 0; loop < volume->nservers; loop++)
 333                        if (volume->servers[loop] == server)
 334                                goto present;
 335
 336                /* no longer there - may have been discarded by another op */
 337                goto try_next_server_upw;
 338
 339        present:
 340                volume->nservers--;
 341                memmove(&volume->servers[loop],
 342                        &volume->servers[loop + 1],
 343                        sizeof(volume->servers[loop]) *
 344                        (volume->nservers - loop));
 345                volume->servers[volume->nservers] = NULL;
 346                afs_put_server(server);
 347                volume->rjservers++;
 348
 349                if (volume->nservers > 0)
 350                        /* another server might acknowledge its existence */
 351                        goto try_next_server_upw;
 352
 353                /* handle the case where all the fileservers have rejected the
 354                 * volume
 355                 * - TODO: try asking the fileservers for volume information
 356                 * - TODO: contact the VL server again to see if the volume is
 357                 *         no longer registered
 358                 */
 359                up_write(&volume->server_sem);
 360                afs_put_server(server);
 361                _leave(" [completely rejected]");
 362                return 1;
 363
 364                /* problem reaching the server */
 365        case -ENETUNREACH:
 366        case -EHOSTUNREACH:
 367        case -ECONNREFUSED:
 368        case -ETIME:
 369        case -ETIMEDOUT:
 370        case -EREMOTEIO:
 371                /* mark the server as dead
 372                 * TODO: vary dead timeout depending on error
 373                 */
 374                spin_lock(&server->fs_lock);
 375                if (!server->fs_state) {
 376                        server->fs_dead_jif = jiffies + HZ * 10;
 377                        server->fs_state = result;
 378                        printk("kAFS: SERVER DEAD state=%d\n", result);
 379                }
 380                spin_unlock(&server->fs_lock);
 381                goto try_next_server;
 382
 383                /* miscellaneous error */
 384        default:
 385                server->fs_act_jif = jiffies;
 386        case -ENOMEM:
 387        case -ENONET:
 388                /* tell the caller to accept the result */
 389                afs_put_server(server);
 390                _leave(" [local failure]");
 391                return 1;
 392        }
 393
 394        /* tell the caller to loop around and try the next server */
 395try_next_server_upw:
 396        up_write(&volume->server_sem);
 397try_next_server:
 398        afs_put_server(server);
 399        _leave(" [try next server]");
 400        return 0;
 401}
 402