linux/fs/nfs/nfs4state.c
<<
>>
Prefs
   1/*
   2 *  fs/nfs/nfs4state.c
   3 *
   4 *  Client-side XDR for NFSv4.
   5 *
   6 *  Copyright (c) 2002 The Regents of the University of Michigan.
   7 *  All rights reserved.
   8 *
   9 *  Kendrick Smith <kmsmith@umich.edu>
  10 *
  11 *  Redistribution and use in source and binary forms, with or without
  12 *  modification, are permitted provided that the following conditions
  13 *  are met:
  14 *
  15 *  1. Redistributions of source code must retain the above copyright
  16 *     notice, this list of conditions and the following disclaimer.
  17 *  2. Redistributions in binary form must reproduce the above copyright
  18 *     notice, this list of conditions and the following disclaimer in the
  19 *     documentation and/or other materials provided with the distribution.
  20 *  3. Neither the name of the University nor the names of its
  21 *     contributors may be used to endorse or promote products derived
  22 *     from this software without specific prior written permission.
  23 *
  24 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  25 *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  26 *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  27 *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  31 *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  32 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  33 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  34 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35 *
  36 * Implementation of the NFSv4 state model.  For the time being,
  37 * this is minimal, but will be made much more complex in a
  38 * subsequent patch.
  39 */
  40
  41#include <linux/kernel.h>
  42#include <linux/slab.h>
  43#include <linux/fs.h>
  44#include <linux/nfs_fs.h>
  45#include <linux/kthread.h>
  46#include <linux/module.h>
  47#include <linux/random.h>
  48#include <linux/ratelimit.h>
  49#include <linux/workqueue.h>
  50#include <linux/bitops.h>
  51#include <linux/jiffies.h>
  52
  53#include <linux/sunrpc/clnt.h>
  54
  55#include "nfs4_fs.h"
  56#include "callback.h"
  57#include "delegation.h"
  58#include "internal.h"
  59#include "nfs4idmap.h"
  60#include "nfs4session.h"
  61#include "pnfs.h"
  62#include "netns.h"
  63
  64#define NFSDBG_FACILITY         NFSDBG_STATE
  65
  66#define OPENOWNER_POOL_SIZE     8
  67
  68const nfs4_stateid zero_stateid;
  69static DEFINE_MUTEX(nfs_clid_init_mutex);
  70
  71int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
  72{
  73        struct nfs4_setclientid_res clid = {
  74                .clientid = clp->cl_clientid,
  75                .confirm = clp->cl_confirm,
  76        };
  77        unsigned short port;
  78        int status;
  79        struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
  80
  81        if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
  82                goto do_confirm;
  83        port = nn->nfs_callback_tcpport;
  84        if (clp->cl_addr.ss_family == AF_INET6)
  85                port = nn->nfs_callback_tcpport6;
  86
  87        status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
  88        if (status != 0)
  89                goto out;
  90        clp->cl_clientid = clid.clientid;
  91        clp->cl_confirm = clid.confirm;
  92        set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
  93do_confirm:
  94        status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
  95        if (status != 0)
  96                goto out;
  97        clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
  98        nfs4_schedule_state_renewal(clp);
  99out:
 100        return status;
 101}
 102
 103/**
 104 * nfs40_discover_server_trunking - Detect server IP address trunking (mv0)
 105 *
 106 * @clp: nfs_client under test
 107 * @result: OUT: found nfs_client, or clp
 108 * @cred: credential to use for trunking test
 109 *
 110 * Returns zero, a negative errno, or a negative NFS4ERR status.
 111 * If zero is returned, an nfs_client pointer is planted in
 112 * "result".
 113 *
 114 * Note: The returned client may not yet be marked ready.
 115 */
 116int nfs40_discover_server_trunking(struct nfs_client *clp,
 117                                   struct nfs_client **result,
 118                                   struct rpc_cred *cred)
 119{
 120        struct nfs4_setclientid_res clid = {
 121                .clientid = clp->cl_clientid,
 122                .confirm = clp->cl_confirm,
 123        };
 124        struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 125        unsigned short port;
 126        int status;
 127
 128        port = nn->nfs_callback_tcpport;
 129        if (clp->cl_addr.ss_family == AF_INET6)
 130                port = nn->nfs_callback_tcpport6;
 131
 132        status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
 133        if (status != 0)
 134                goto out;
 135        clp->cl_clientid = clid.clientid;
 136        clp->cl_confirm = clid.confirm;
 137
 138        status = nfs40_walk_client_list(clp, result, cred);
 139        if (status == 0) {
 140                /* Sustain the lease, even if it's empty.  If the clientid4
 141                 * goes stale it's of no use for trunking discovery. */
 142                nfs4_schedule_state_renewal(*result);
 143        }
 144out:
 145        return status;
 146}
 147
 148struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
 149{
 150        struct rpc_cred *cred = NULL;
 151
 152        if (clp->cl_machine_cred != NULL)
 153                cred = get_rpccred(clp->cl_machine_cred);
 154        return cred;
 155}
 156
 157static void nfs4_root_machine_cred(struct nfs_client *clp)
 158{
 159        struct rpc_cred *cred, *new;
 160
 161        new = rpc_lookup_machine_cred(NULL);
 162        spin_lock(&clp->cl_lock);
 163        cred = clp->cl_machine_cred;
 164        clp->cl_machine_cred = new;
 165        spin_unlock(&clp->cl_lock);
 166        if (cred != NULL)
 167                put_rpccred(cred);
 168}
 169
 170static struct rpc_cred *
 171nfs4_get_renew_cred_server_locked(struct nfs_server *server)
 172{
 173        struct rpc_cred *cred = NULL;
 174        struct nfs4_state_owner *sp;
 175        struct rb_node *pos;
 176
 177        for (pos = rb_first(&server->state_owners);
 178             pos != NULL;
 179             pos = rb_next(pos)) {
 180                sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
 181                if (list_empty(&sp->so_states))
 182                        continue;
 183                cred = get_rpccred(sp->so_cred);
 184                break;
 185        }
 186        return cred;
 187}
 188
 189/**
 190 * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
 191 * @clp: client state handle
 192 *
 193 * Returns an rpc_cred with reference count bumped, or NULL.
 194 * Caller must hold clp->cl_lock.
 195 */
 196struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
 197{
 198        struct rpc_cred *cred = NULL;
 199        struct nfs_server *server;
 200
 201        /* Use machine credentials if available */
 202        cred = nfs4_get_machine_cred_locked(clp);
 203        if (cred != NULL)
 204                goto out;
 205
 206        rcu_read_lock();
 207        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
 208                cred = nfs4_get_renew_cred_server_locked(server);
 209                if (cred != NULL)
 210                        break;
 211        }
 212        rcu_read_unlock();
 213
 214out:
 215        return cred;
 216}
 217
 218static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
 219{
 220        if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
 221                spin_lock(&tbl->slot_tbl_lock);
 222                nfs41_wake_slot_table(tbl);
 223                spin_unlock(&tbl->slot_tbl_lock);
 224        }
 225}
 226
 227static void nfs4_end_drain_session(struct nfs_client *clp)
 228{
 229        struct nfs4_session *ses = clp->cl_session;
 230
 231        if (clp->cl_slot_tbl) {
 232                nfs4_end_drain_slot_table(clp->cl_slot_tbl);
 233                return;
 234        }
 235
 236        if (ses != NULL) {
 237                nfs4_end_drain_slot_table(&ses->bc_slot_table);
 238                nfs4_end_drain_slot_table(&ses->fc_slot_table);
 239        }
 240}
 241
 242static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
 243{
 244        set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
 245        spin_lock(&tbl->slot_tbl_lock);
 246        if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
 247                reinit_completion(&tbl->complete);
 248                spin_unlock(&tbl->slot_tbl_lock);
 249                return wait_for_completion_interruptible(&tbl->complete);
 250        }
 251        spin_unlock(&tbl->slot_tbl_lock);
 252        return 0;
 253}
 254
 255static int nfs4_begin_drain_session(struct nfs_client *clp)
 256{
 257        struct nfs4_session *ses = clp->cl_session;
 258        int ret = 0;
 259
 260        if (clp->cl_slot_tbl)
 261                return nfs4_drain_slot_tbl(clp->cl_slot_tbl);
 262
 263        /* back channel */
 264        ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
 265        if (ret)
 266                return ret;
 267        /* fore channel */
 268        return nfs4_drain_slot_tbl(&ses->fc_slot_table);
 269}
 270
 271#if defined(CONFIG_NFS_V4_1)
 272
 273static int nfs41_setup_state_renewal(struct nfs_client *clp)
 274{
 275        int status;
 276        struct nfs_fsinfo fsinfo;
 277
 278        if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
 279                nfs4_schedule_state_renewal(clp);
 280                return 0;
 281        }
 282
 283        status = nfs4_proc_get_lease_time(clp, &fsinfo);
 284        if (status == 0) {
 285                /* Update lease time and schedule renewal */
 286                spin_lock(&clp->cl_lock);
 287                clp->cl_lease_time = fsinfo.lease_time * HZ;
 288                clp->cl_last_renewal = jiffies;
 289                spin_unlock(&clp->cl_lock);
 290
 291                nfs4_schedule_state_renewal(clp);
 292        }
 293
 294        return status;
 295}
 296
 297static void nfs41_finish_session_reset(struct nfs_client *clp)
 298{
 299        clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 300        clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
 301        /* create_session negotiated new slot table */
 302        clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
 303        nfs41_setup_state_renewal(clp);
 304}
 305
 306int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 307{
 308        int status;
 309
 310        if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
 311                goto do_confirm;
 312        status = nfs4_proc_exchange_id(clp, cred);
 313        if (status != 0)
 314                goto out;
 315        set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 316do_confirm:
 317        status = nfs4_proc_create_session(clp, cred);
 318        if (status != 0)
 319                goto out;
 320        nfs41_finish_session_reset(clp);
 321        nfs_mark_client_ready(clp, NFS_CS_READY);
 322out:
 323        return status;
 324}
 325
 326/**
 327 * nfs41_discover_server_trunking - Detect server IP address trunking (mv1)
 328 *
 329 * @clp: nfs_client under test
 330 * @result: OUT: found nfs_client, or clp
 331 * @cred: credential to use for trunking test
 332 *
 333 * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status.
 334 * If NFS4_OK is returned, an nfs_client pointer is planted in
 335 * "result".
 336 *
 337 * Note: The returned client may not yet be marked ready.
 338 */
 339int nfs41_discover_server_trunking(struct nfs_client *clp,
 340                                   struct nfs_client **result,
 341                                   struct rpc_cred *cred)
 342{
 343        int status;
 344
 345        status = nfs4_proc_exchange_id(clp, cred);
 346        if (status != NFS4_OK)
 347                return status;
 348
 349        status = nfs41_walk_client_list(clp, result, cred);
 350        if (status < 0)
 351                return status;
 352        if (clp != *result)
 353                return 0;
 354
 355        /* Purge state if the client id was established in a prior instance */
 356        if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
 357                set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
 358        else
 359                set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 360        nfs4_schedule_state_manager(clp);
 361        status = nfs_wait_client_init_complete(clp);
 362        if (status < 0)
 363                nfs_put_client(clp);
 364        return status;
 365}
 366
 367#endif /* CONFIG_NFS_V4_1 */
 368
 369/**
 370 * nfs4_get_clid_cred - Acquire credential for a setclientid operation
 371 * @clp: client state handle
 372 *
 373 * Returns an rpc_cred with reference count bumped, or NULL.
 374 */
 375struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp)
 376{
 377        struct rpc_cred *cred;
 378
 379        spin_lock(&clp->cl_lock);
 380        cred = nfs4_get_machine_cred_locked(clp);
 381        spin_unlock(&clp->cl_lock);
 382        return cred;
 383}
 384
 385static struct nfs4_state_owner *
 386nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 387{
 388        struct rb_node **p = &server->state_owners.rb_node,
 389                       *parent = NULL;
 390        struct nfs4_state_owner *sp;
 391
 392        while (*p != NULL) {
 393                parent = *p;
 394                sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
 395
 396                if (cred < sp->so_cred)
 397                        p = &parent->rb_left;
 398                else if (cred > sp->so_cred)
 399                        p = &parent->rb_right;
 400                else {
 401                        if (!list_empty(&sp->so_lru))
 402                                list_del_init(&sp->so_lru);
 403                        atomic_inc(&sp->so_count);
 404                        return sp;
 405                }
 406        }
 407        return NULL;
 408}
 409
 410static struct nfs4_state_owner *
 411nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 412{
 413        struct nfs_server *server = new->so_server;
 414        struct rb_node **p = &server->state_owners.rb_node,
 415                       *parent = NULL;
 416        struct nfs4_state_owner *sp;
 417        int err;
 418
 419        while (*p != NULL) {
 420                parent = *p;
 421                sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
 422
 423                if (new->so_cred < sp->so_cred)
 424                        p = &parent->rb_left;
 425                else if (new->so_cred > sp->so_cred)
 426                        p = &parent->rb_right;
 427                else {
 428                        if (!list_empty(&sp->so_lru))
 429                                list_del_init(&sp->so_lru);
 430                        atomic_inc(&sp->so_count);
 431                        return sp;
 432                }
 433        }
 434        err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id);
 435        if (err)
 436                return ERR_PTR(err);
 437        rb_link_node(&new->so_server_node, parent, p);
 438        rb_insert_color(&new->so_server_node, &server->state_owners);
 439        return new;
 440}
 441
 442static void
 443nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
 444{
 445        struct nfs_server *server = sp->so_server;
 446
 447        if (!RB_EMPTY_NODE(&sp->so_server_node))
 448                rb_erase(&sp->so_server_node, &server->state_owners);
 449        ida_remove(&server->openowner_id, sp->so_seqid.owner_id);
 450}
 451
 452static void
 453nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
 454{
 455        sc->create_time = ktime_get();
 456        sc->flags = 0;
 457        sc->counter = 0;
 458        spin_lock_init(&sc->lock);
 459        INIT_LIST_HEAD(&sc->list);
 460        rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
 461}
 462
 463static void
 464nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
 465{
 466        rpc_destroy_wait_queue(&sc->wait);
 467}
 468
 469/*
 470 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
 471 * create a new state_owner.
 472 *
 473 */
 474static struct nfs4_state_owner *
 475nfs4_alloc_state_owner(struct nfs_server *server,
 476                struct rpc_cred *cred,
 477                gfp_t gfp_flags)
 478{
 479        struct nfs4_state_owner *sp;
 480
 481        sp = kzalloc(sizeof(*sp), gfp_flags);
 482        if (!sp)
 483                return NULL;
 484        sp->so_server = server;
 485        sp->so_cred = get_rpccred(cred);
 486        spin_lock_init(&sp->so_lock);
 487        INIT_LIST_HEAD(&sp->so_states);
 488        nfs4_init_seqid_counter(&sp->so_seqid);
 489        atomic_set(&sp->so_count, 1);
 490        INIT_LIST_HEAD(&sp->so_lru);
 491        seqcount_init(&sp->so_reclaim_seqcount);
 492        mutex_init(&sp->so_delegreturn_mutex);
 493        return sp;
 494}
 495
 496static void
 497nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 498{
 499        struct rb_node *rb_node = &sp->so_server_node;
 500
 501        if (!RB_EMPTY_NODE(rb_node)) {
 502                struct nfs_server *server = sp->so_server;
 503                struct nfs_client *clp = server->nfs_client;
 504
 505                spin_lock(&clp->cl_lock);
 506                if (!RB_EMPTY_NODE(rb_node)) {
 507                        rb_erase(rb_node, &server->state_owners);
 508                        RB_CLEAR_NODE(rb_node);
 509                }
 510                spin_unlock(&clp->cl_lock);
 511        }
 512}
 513
 514static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
 515{
 516        nfs4_destroy_seqid_counter(&sp->so_seqid);
 517        put_rpccred(sp->so_cred);
 518        kfree(sp);
 519}
 520
 521static void nfs4_gc_state_owners(struct nfs_server *server)
 522{
 523        struct nfs_client *clp = server->nfs_client;
 524        struct nfs4_state_owner *sp, *tmp;
 525        unsigned long time_min, time_max;
 526        LIST_HEAD(doomed);
 527
 528        spin_lock(&clp->cl_lock);
 529        time_max = jiffies;
 530        time_min = (long)time_max - (long)clp->cl_lease_time;
 531        list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
 532                /* NB: LRU is sorted so that oldest is at the head */
 533                if (time_in_range(sp->so_expires, time_min, time_max))
 534                        break;
 535                list_move(&sp->so_lru, &doomed);
 536                nfs4_remove_state_owner_locked(sp);
 537        }
 538        spin_unlock(&clp->cl_lock);
 539
 540        list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
 541                list_del(&sp->so_lru);
 542                nfs4_free_state_owner(sp);
 543        }
 544}
 545
 546/**
 547 * nfs4_get_state_owner - Look up a state owner given a credential
 548 * @server: nfs_server to search
 549 * @cred: RPC credential to match
 550 *
 551 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
 552 */
 553struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 554                                              struct rpc_cred *cred,
 555                                              gfp_t gfp_flags)
 556{
 557        struct nfs_client *clp = server->nfs_client;
 558        struct nfs4_state_owner *sp, *new;
 559
 560        spin_lock(&clp->cl_lock);
 561        sp = nfs4_find_state_owner_locked(server, cred);
 562        spin_unlock(&clp->cl_lock);
 563        if (sp != NULL)
 564                goto out;
 565        new = nfs4_alloc_state_owner(server, cred, gfp_flags);
 566        if (new == NULL)
 567                goto out;
 568        do {
 569                if (ida_pre_get(&server->openowner_id, gfp_flags) == 0)
 570                        break;
 571                spin_lock(&clp->cl_lock);
 572                sp = nfs4_insert_state_owner_locked(new);
 573                spin_unlock(&clp->cl_lock);
 574        } while (sp == ERR_PTR(-EAGAIN));
 575        if (sp != new)
 576                nfs4_free_state_owner(new);
 577out:
 578        nfs4_gc_state_owners(server);
 579        return sp;
 580}
 581
 582/**
 583 * nfs4_put_state_owner - Release a nfs4_state_owner
 584 * @sp: state owner data to release
 585 *
 586 * Note that we keep released state owners on an LRU
 587 * list.
 588 * This caches valid state owners so that they can be
 589 * reused, to avoid the OPEN_CONFIRM on minor version 0.
 590 * It also pins the uniquifier of dropped state owners for
 591 * a while, to ensure that those state owner names are
 592 * never reused.
 593 */
 594void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 595{
 596        struct nfs_server *server = sp->so_server;
 597        struct nfs_client *clp = server->nfs_client;
 598
 599        if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 600                return;
 601
 602        sp->so_expires = jiffies;
 603        list_add_tail(&sp->so_lru, &server->state_owners_lru);
 604        spin_unlock(&clp->cl_lock);
 605}
 606
 607/**
 608 * nfs4_purge_state_owners - Release all cached state owners
 609 * @server: nfs_server with cached state owners to release
 610 *
 611 * Called at umount time.  Remaining state owners will be on
 612 * the LRU with ref count of zero.
 613 */
 614void nfs4_purge_state_owners(struct nfs_server *server)
 615{
 616        struct nfs_client *clp = server->nfs_client;
 617        struct nfs4_state_owner *sp, *tmp;
 618        LIST_HEAD(doomed);
 619
 620        spin_lock(&clp->cl_lock);
 621        list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
 622                list_move(&sp->so_lru, &doomed);
 623                nfs4_remove_state_owner_locked(sp);
 624        }
 625        spin_unlock(&clp->cl_lock);
 626
 627        list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
 628                list_del(&sp->so_lru);
 629                nfs4_free_state_owner(sp);
 630        }
 631}
 632
 633static struct nfs4_state *
 634nfs4_alloc_open_state(void)
 635{
 636        struct nfs4_state *state;
 637
 638        state = kzalloc(sizeof(*state), GFP_NOFS);
 639        if (!state)
 640                return NULL;
 641        atomic_set(&state->count, 1);
 642        INIT_LIST_HEAD(&state->lock_states);
 643        spin_lock_init(&state->state_lock);
 644        seqlock_init(&state->seqlock);
 645        return state;
 646}
 647
 648void
 649nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
 650{
 651        if (state->state == fmode)
 652                return;
 653        /* NB! List reordering - see the reclaim code for why.  */
 654        if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
 655                if (fmode & FMODE_WRITE)
 656                        list_move(&state->open_states, &state->owner->so_states);
 657                else
 658                        list_move_tail(&state->open_states, &state->owner->so_states);
 659        }
 660        state->state = fmode;
 661}
 662
 663static struct nfs4_state *
 664__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 665{
 666        struct nfs_inode *nfsi = NFS_I(inode);
 667        struct nfs4_state *state;
 668
 669        list_for_each_entry(state, &nfsi->open_states, inode_states) {
 670                if (state->owner != owner)
 671                        continue;
 672                if (!nfs4_valid_open_stateid(state))
 673                        continue;
 674                if (atomic_inc_not_zero(&state->count))
 675                        return state;
 676        }
 677        return NULL;
 678}
 679
 680static void
 681nfs4_free_open_state(struct nfs4_state *state)
 682{
 683        kfree(state);
 684}
 685
 686struct nfs4_state *
 687nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
 688{
 689        struct nfs4_state *state, *new;
 690        struct nfs_inode *nfsi = NFS_I(inode);
 691
 692        spin_lock(&inode->i_lock);
 693        state = __nfs4_find_state_byowner(inode, owner);
 694        spin_unlock(&inode->i_lock);
 695        if (state)
 696                goto out;
 697        new = nfs4_alloc_open_state();
 698        spin_lock(&owner->so_lock);
 699        spin_lock(&inode->i_lock);
 700        state = __nfs4_find_state_byowner(inode, owner);
 701        if (state == NULL && new != NULL) {
 702                state = new;
 703                state->owner = owner;
 704                atomic_inc(&owner->so_count);
 705                list_add(&state->inode_states, &nfsi->open_states);
 706                ihold(inode);
 707                state->inode = inode;
 708                spin_unlock(&inode->i_lock);
 709                /* Note: The reclaim code dictates that we add stateless
 710                 * and read-only stateids to the end of the list */
 711                list_add_tail(&state->open_states, &owner->so_states);
 712                spin_unlock(&owner->so_lock);
 713        } else {
 714                spin_unlock(&inode->i_lock);
 715                spin_unlock(&owner->so_lock);
 716                if (new)
 717                        nfs4_free_open_state(new);
 718        }
 719out:
 720        return state;
 721}
 722
 723void nfs4_put_open_state(struct nfs4_state *state)
 724{
 725        struct inode *inode = state->inode;
 726        struct nfs4_state_owner *owner = state->owner;
 727
 728        if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
 729                return;
 730        spin_lock(&inode->i_lock);
 731        list_del(&state->inode_states);
 732        list_del(&state->open_states);
 733        spin_unlock(&inode->i_lock);
 734        spin_unlock(&owner->so_lock);
 735        iput(inode);
 736        nfs4_free_open_state(state);
 737        nfs4_put_state_owner(owner);
 738}
 739
 740/*
 741 * Close the current file.
 742 */
 743static void __nfs4_close(struct nfs4_state *state,
 744                fmode_t fmode, gfp_t gfp_mask, int wait)
 745{
 746        struct nfs4_state_owner *owner = state->owner;
 747        int call_close = 0;
 748        fmode_t newstate;
 749
 750        atomic_inc(&owner->so_count);
 751        /* Protect against nfs4_find_state() */
 752        spin_lock(&owner->so_lock);
 753        switch (fmode & (FMODE_READ | FMODE_WRITE)) {
 754                case FMODE_READ:
 755                        state->n_rdonly--;
 756                        break;
 757                case FMODE_WRITE:
 758                        state->n_wronly--;
 759                        break;
 760                case FMODE_READ|FMODE_WRITE:
 761                        state->n_rdwr--;
 762        }
 763        newstate = FMODE_READ|FMODE_WRITE;
 764        if (state->n_rdwr == 0) {
 765                if (state->n_rdonly == 0) {
 766                        newstate &= ~FMODE_READ;
 767                        call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
 768                        call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
 769                }
 770                if (state->n_wronly == 0) {
 771                        newstate &= ~FMODE_WRITE;
 772                        call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
 773                        call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
 774                }
 775                if (newstate == 0)
 776                        clear_bit(NFS_DELEGATED_STATE, &state->flags);
 777        }
 778        nfs4_state_set_mode_locked(state, newstate);
 779        spin_unlock(&owner->so_lock);
 780
 781        if (!call_close) {
 782                nfs4_put_open_state(state);
 783                nfs4_put_state_owner(owner);
 784        } else
 785                nfs4_do_close(state, gfp_mask, wait);
 786}
 787
 788void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
 789{
 790        __nfs4_close(state, fmode, GFP_NOFS, 0);
 791}
 792
 793void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
 794{
 795        __nfs4_close(state, fmode, GFP_KERNEL, 1);
 796}
 797
 798/*
 799 * Search the state->lock_states for an existing lock_owner
 800 * that is compatible with current->files
 801 */
 802static struct nfs4_lock_state *
 803__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 804{
 805        struct nfs4_lock_state *pos;
 806        list_for_each_entry(pos, &state->lock_states, ls_locks) {
 807                if (pos->ls_owner != fl_owner)
 808                        continue;
 809                atomic_inc(&pos->ls_count);
 810                return pos;
 811        }
 812        return NULL;
 813}
 814
 815/*
 816 * Return a compatible lock_state. If no initialized lock_state structure
 817 * exists, return an uninitialized one.
 818 *
 819 */
 820static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 821{
 822        struct nfs4_lock_state *lsp;
 823        struct nfs_server *server = state->owner->so_server;
 824
 825        lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
 826        if (lsp == NULL)
 827                return NULL;
 828        nfs4_init_seqid_counter(&lsp->ls_seqid);
 829        atomic_set(&lsp->ls_count, 1);
 830        lsp->ls_state = state;
 831        lsp->ls_owner = fl_owner;
 832        lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
 833        if (lsp->ls_seqid.owner_id < 0)
 834                goto out_free;
 835        INIT_LIST_HEAD(&lsp->ls_locks);
 836        return lsp;
 837out_free:
 838        kfree(lsp);
 839        return NULL;
 840}
 841
 842void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
 843{
 844        ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
 845        nfs4_destroy_seqid_counter(&lsp->ls_seqid);
 846        kfree(lsp);
 847}
 848
 849/*
 850 * Return a compatible lock_state. If no initialized lock_state structure
 851 * exists, return an uninitialized one.
 852 *
 853 */
 854static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 855{
 856        struct nfs4_lock_state *lsp, *new = NULL;
 857        
 858        for(;;) {
 859                spin_lock(&state->state_lock);
 860                lsp = __nfs4_find_lock_state(state, owner);
 861                if (lsp != NULL)
 862                        break;
 863                if (new != NULL) {
 864                        list_add(&new->ls_locks, &state->lock_states);
 865                        set_bit(LK_STATE_IN_USE, &state->flags);
 866                        lsp = new;
 867                        new = NULL;
 868                        break;
 869                }
 870                spin_unlock(&state->state_lock);
 871                new = nfs4_alloc_lock_state(state, owner);
 872                if (new == NULL)
 873                        return NULL;
 874        }
 875        spin_unlock(&state->state_lock);
 876        if (new != NULL)
 877                nfs4_free_lock_state(state->owner->so_server, new);
 878        return lsp;
 879}
 880
 881/*
 882 * Release reference to lock_state, and free it if we see that
 883 * it is no longer in use
 884 */
 885void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 886{
 887        struct nfs_server *server;
 888        struct nfs4_state *state;
 889
 890        if (lsp == NULL)
 891                return;
 892        state = lsp->ls_state;
 893        if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
 894                return;
 895        list_del(&lsp->ls_locks);
 896        if (list_empty(&state->lock_states))
 897                clear_bit(LK_STATE_IN_USE, &state->flags);
 898        spin_unlock(&state->state_lock);
 899        server = state->owner->so_server;
 900        if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
 901                struct nfs_client *clp = server->nfs_client;
 902
 903                clp->cl_mvops->free_lock_state(server, lsp);
 904        } else
 905                nfs4_free_lock_state(server, lsp);
 906}
 907
 908static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 909{
 910        struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
 911
 912        dst->fl_u.nfs4_fl.owner = lsp;
 913        atomic_inc(&lsp->ls_count);
 914}
 915
 916static void nfs4_fl_release_lock(struct file_lock *fl)
 917{
 918        nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 919}
 920
 921static const struct file_lock_operations nfs4_fl_lock_ops = {
 922        .fl_copy_lock = nfs4_fl_copy_lock,
 923        .fl_release_private = nfs4_fl_release_lock,
 924};
 925
 926int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 927{
 928        struct nfs4_lock_state *lsp;
 929
 930        if (fl->fl_ops != NULL)
 931                return 0;
 932        lsp = nfs4_get_lock_state(state, fl->fl_owner);
 933        if (lsp == NULL)
 934                return -ENOMEM;
 935        fl->fl_u.nfs4_fl.owner = lsp;
 936        fl->fl_ops = &nfs4_fl_lock_ops;
 937        return 0;
 938}
 939
 940static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
 941                struct nfs4_state *state,
 942                const struct nfs_lockowner *lockowner)
 943{
 944        struct nfs4_lock_state *lsp;
 945        fl_owner_t fl_owner;
 946        int ret = -ENOENT;
 947
 948
 949        if (lockowner == NULL)
 950                goto out;
 951
 952        if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
 953                goto out;
 954
 955        fl_owner = lockowner->l_owner;
 956        spin_lock(&state->state_lock);
 957        lsp = __nfs4_find_lock_state(state, fl_owner);
 958        if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
 959                ret = -EIO;
 960        else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
 961                nfs4_stateid_copy(dst, &lsp->ls_stateid);
 962                ret = 0;
 963        }
 964        spin_unlock(&state->state_lock);
 965        nfs4_put_lock_state(lsp);
 966out:
 967        return ret;
 968}
 969
 970static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
 971{
 972        const nfs4_stateid *src;
 973        int seq;
 974
 975        do {
 976                src = &zero_stateid;
 977                seq = read_seqbegin(&state->seqlock);
 978                if (test_bit(NFS_OPEN_STATE, &state->flags))
 979                        src = &state->open_stateid;
 980                nfs4_stateid_copy(dst, src);
 981        } while (read_seqretry(&state->seqlock, seq));
 982}
 983
 984/*
 985 * Byte-range lock aware utility to initialize the stateid of read/write
 986 * requests.
 987 */
 988int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
 989                fmode_t fmode, const struct nfs_lockowner *lockowner)
 990{
 991        int ret = nfs4_copy_lock_stateid(dst, state, lockowner);
 992        if (ret == -EIO)
 993                /* A lost lock - don't even consider delegations */
 994                goto out;
 995        /* returns true if delegation stateid found and copied */
 996        if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) {
 997                ret = 0;
 998                goto out;
 999        }
1000        if (ret != -ENOENT)
1001                /* nfs4_copy_delegation_stateid() didn't over-write
1002                 * dst, so it still has the lock stateid which we now
1003                 * choose to use.
1004                 */
1005                goto out;
1006        nfs4_copy_open_stateid(dst, state);
1007        ret = 0;
1008out:
1009        if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
1010                dst->seqid = 0;
1011        return ret;
1012}
1013
1014struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
1015{
1016        struct nfs_seqid *new;
1017
1018        new = kmalloc(sizeof(*new), gfp_mask);
1019        if (new == NULL)
1020                return ERR_PTR(-ENOMEM);
1021        new->sequence = counter;
1022        INIT_LIST_HEAD(&new->list);
1023        new->task = NULL;
1024        return new;
1025}
1026
1027void nfs_release_seqid(struct nfs_seqid *seqid)
1028{
1029        struct nfs_seqid_counter *sequence;
1030
1031        if (seqid == NULL || list_empty(&seqid->list))
1032                return;
1033        sequence = seqid->sequence;
1034        spin_lock(&sequence->lock);
1035        list_del_init(&seqid->list);
1036        if (!list_empty(&sequence->list)) {
1037                struct nfs_seqid *next;
1038
1039                next = list_first_entry(&sequence->list,
1040                                struct nfs_seqid, list);
1041                rpc_wake_up_queued_task(&sequence->wait, next->task);
1042        }
1043        spin_unlock(&sequence->lock);
1044}
1045
1046void nfs_free_seqid(struct nfs_seqid *seqid)
1047{
1048        nfs_release_seqid(seqid);
1049        kfree(seqid);
1050}
1051
1052/*
1053 * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
1054 * failed with a seqid incrementing error -
1055 * see comments nfs4.h:seqid_mutating_error()
1056 */
1057static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
1058{
1059        switch (status) {
1060                case 0:
1061                        break;
1062                case -NFS4ERR_BAD_SEQID:
1063                        if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
1064                                return;
1065                        pr_warn_ratelimited("NFS: v4 server returned a bad"
1066                                        " sequence-id error on an"
1067                                        " unconfirmed sequence %p!\n",
1068                                        seqid->sequence);
1069                case -NFS4ERR_STALE_CLIENTID:
1070                case -NFS4ERR_STALE_STATEID:
1071                case -NFS4ERR_BAD_STATEID:
1072                case -NFS4ERR_BADXDR:
1073                case -NFS4ERR_RESOURCE:
1074                case -NFS4ERR_NOFILEHANDLE:
1075                        /* Non-seqid mutating errors */
1076                        return;
1077        };
1078        /*
1079         * Note: no locking needed as we are guaranteed to be first
1080         * on the sequence list
1081         */
1082        seqid->sequence->counter++;
1083}
1084
1085void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
1086{
1087        struct nfs4_state_owner *sp;
1088
1089        if (seqid == NULL)
1090                return;
1091
1092        sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid);
1093        if (status == -NFS4ERR_BAD_SEQID)
1094                nfs4_drop_state_owner(sp);
1095        if (!nfs4_has_session(sp->so_server->nfs_client))
1096                nfs_increment_seqid(status, seqid);
1097}
1098
1099/*
1100 * Increment the seqid if the LOCK/LOCKU succeeded, or
1101 * failed with a seqid incrementing error -
1102 * see comments nfs4.h:seqid_mutating_error()
1103 */
1104void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
1105{
1106        if (seqid != NULL)
1107                nfs_increment_seqid(status, seqid);
1108}
1109
1110int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
1111{
1112        struct nfs_seqid_counter *sequence;
1113        int status = 0;
1114
1115        if (seqid == NULL)
1116                goto out;
1117        sequence = seqid->sequence;
1118        spin_lock(&sequence->lock);
1119        seqid->task = task;
1120        if (list_empty(&seqid->list))
1121                list_add_tail(&seqid->list, &sequence->list);
1122        if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
1123                goto unlock;
1124        rpc_sleep_on(&sequence->wait, task, NULL);
1125        status = -EAGAIN;
1126unlock:
1127        spin_unlock(&sequence->lock);
1128out:
1129        return status;
1130}
1131
1132static int nfs4_run_state_manager(void *);
1133
1134static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
1135{
1136        smp_mb__before_atomic();
1137        clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
1138        smp_mb__after_atomic();
1139        wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
1140        rpc_wake_up(&clp->cl_rpcwaitq);
1141}
1142
1143/*
1144 * Schedule the nfs_client asynchronous state management routine
1145 */
1146void nfs4_schedule_state_manager(struct nfs_client *clp)
1147{
1148        struct task_struct *task;
1149        char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
1150
1151        if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1152                return;
1153        __module_get(THIS_MODULE);
1154        atomic_inc(&clp->cl_count);
1155
1156        /* The rcu_read_lock() is not strictly necessary, as the state
1157         * manager is the only thread that ever changes the rpc_xprt
1158         * after it's initialized.  At this point, we're single threaded. */
1159        rcu_read_lock();
1160        snprintf(buf, sizeof(buf), "%s-manager",
1161                        rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
1162        rcu_read_unlock();
1163        task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
1164        if (IS_ERR(task)) {
1165                printk(KERN_ERR "%s: kthread_run: %ld\n",
1166                        __func__, PTR_ERR(task));
1167                nfs4_clear_state_manager_bit(clp);
1168                nfs_put_client(clp);
1169                module_put(THIS_MODULE);
1170        }
1171}
1172
1173/*
1174 * Schedule a lease recovery attempt
1175 */
1176void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1177{
1178        if (!clp)
1179                return;
1180        if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1181                set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1182        dprintk("%s: scheduling lease recovery for server %s\n", __func__,
1183                        clp->cl_hostname);
1184        nfs4_schedule_state_manager(clp);
1185}
1186EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
1187
1188/**
1189 * nfs4_schedule_migration_recovery - trigger migration recovery
1190 *
1191 * @server: FSID that is migrating
1192 *
1193 * Returns zero if recovery has started, otherwise a negative NFS4ERR
1194 * value is returned.
1195 */
1196int nfs4_schedule_migration_recovery(const struct nfs_server *server)
1197{
1198        struct nfs_client *clp = server->nfs_client;
1199
1200        if (server->fh_expire_type != NFS4_FH_PERSISTENT) {
1201                pr_err("NFS: volatile file handles not supported (server %s)\n",
1202                                clp->cl_hostname);
1203                return -NFS4ERR_IO;
1204        }
1205
1206        if (test_bit(NFS_MIG_FAILED, &server->mig_status))
1207                return -NFS4ERR_IO;
1208
1209        dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n",
1210                        __func__,
1211                        (unsigned long long)server->fsid.major,
1212                        (unsigned long long)server->fsid.minor,
1213                        clp->cl_hostname);
1214
1215        set_bit(NFS_MIG_IN_TRANSITION,
1216                        &((struct nfs_server *)server)->mig_status);
1217        set_bit(NFS4CLNT_MOVED, &clp->cl_state);
1218
1219        nfs4_schedule_state_manager(clp);
1220        return 0;
1221}
1222EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery);
1223
1224/**
1225 * nfs4_schedule_lease_moved_recovery - start lease-moved recovery
1226 *
1227 * @clp: server to check for moved leases
1228 *
1229 */
1230void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp)
1231{
1232        dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n",
1233                __func__, clp->cl_clientid, clp->cl_hostname);
1234
1235        set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state);
1236        nfs4_schedule_state_manager(clp);
1237}
1238EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery);
1239
1240int nfs4_wait_clnt_recover(struct nfs_client *clp)
1241{
1242        int res;
1243
1244        might_sleep();
1245
1246        atomic_inc(&clp->cl_count);
1247        res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
1248                                 nfs_wait_bit_killable, TASK_KILLABLE);
1249        if (res)
1250                goto out;
1251        if (clp->cl_cons_state < 0)
1252                res = clp->cl_cons_state;
1253out:
1254        nfs_put_client(clp);
1255        return res;
1256}
1257
1258int nfs4_client_recover_expired_lease(struct nfs_client *clp)
1259{
1260        unsigned int loop;
1261        int ret;
1262
1263        for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
1264                ret = nfs4_wait_clnt_recover(clp);
1265                if (ret != 0)
1266                        break;
1267                if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1268                    !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1269                        break;
1270                nfs4_schedule_state_manager(clp);
1271                ret = -EIO;
1272        }
1273        return ret;
1274}
1275
1276/*
1277 * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
1278 * @clp: client to process
1279 *
1280 * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
1281 * resend of the SETCLIENTID and hence re-establish the
1282 * callback channel. Then return all existing delegations.
1283 */
1284static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1285{
1286        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1287        nfs_expire_all_delegations(clp);
1288        dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
1289                        clp->cl_hostname);
1290}
1291
1292void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
1293{
1294        nfs40_handle_cb_pathdown(clp);
1295        nfs4_schedule_state_manager(clp);
1296}
1297
1298static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1299{
1300
1301        set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1302        /* Don't recover state that expired before the reboot */
1303        if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) {
1304                clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1305                return 0;
1306        }
1307        set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags);
1308        set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1309        return 1;
1310}
1311
1312int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1313{
1314        set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1315        clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1316        set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags);
1317        set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1318        return 1;
1319}
1320
1321int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1322{
1323        struct nfs_client *clp = server->nfs_client;
1324
1325        if (!nfs4_valid_open_stateid(state))
1326                return -EBADF;
1327        nfs4_state_mark_reclaim_nograce(clp, state);
1328        dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
1329                        clp->cl_hostname);
1330        nfs4_schedule_state_manager(clp);
1331        return 0;
1332}
1333EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
1334
1335void nfs_inode_find_state_and_recover(struct inode *inode,
1336                const nfs4_stateid *stateid)
1337{
1338        struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1339        struct nfs_inode *nfsi = NFS_I(inode);
1340        struct nfs_open_context *ctx;
1341        struct nfs4_state *state;
1342        bool found = false;
1343
1344        spin_lock(&inode->i_lock);
1345        list_for_each_entry(ctx, &nfsi->open_files, list) {
1346                state = ctx->state;
1347                if (state == NULL)
1348                        continue;
1349                if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
1350                        continue;
1351                if (!nfs4_stateid_match(&state->stateid, stateid))
1352                        continue;
1353                nfs4_state_mark_reclaim_nograce(clp, state);
1354                found = true;
1355        }
1356        spin_unlock(&inode->i_lock);
1357        if (found)
1358                nfs4_schedule_state_manager(clp);
1359}
1360
1361static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
1362{
1363        struct inode *inode = state->inode;
1364        struct nfs_inode *nfsi = NFS_I(inode);
1365        struct nfs_open_context *ctx;
1366
1367        spin_lock(&inode->i_lock);
1368        list_for_each_entry(ctx, &nfsi->open_files, list) {
1369                if (ctx->state != state)
1370                        continue;
1371                set_bit(NFS_CONTEXT_BAD, &ctx->flags);
1372        }
1373        spin_unlock(&inode->i_lock);
1374}
1375
1376static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
1377{
1378        set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
1379        nfs4_state_mark_open_context_bad(state);
1380}
1381
1382
1383static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1384{
1385        struct inode *inode = state->inode;
1386        struct nfs_inode *nfsi = NFS_I(inode);
1387        struct file_lock *fl;
1388        int status = 0;
1389        struct file_lock_context *flctx = inode->i_flctx;
1390        struct list_head *list;
1391
1392        if (flctx == NULL)
1393                return 0;
1394
1395        list = &flctx->flc_posix;
1396
1397        /* Guard against delegation returns and new lock/unlock calls */
1398        down_write(&nfsi->rwsem);
1399        spin_lock(&flctx->flc_lock);
1400restart:
1401        list_for_each_entry(fl, list, fl_list) {
1402                if (nfs_file_open_context(fl->fl_file)->state != state)
1403                        continue;
1404                spin_unlock(&flctx->flc_lock);
1405                status = ops->recover_lock(state, fl);
1406                switch (status) {
1407                case 0:
1408                        break;
1409                case -ESTALE:
1410                case -NFS4ERR_ADMIN_REVOKED:
1411                case -NFS4ERR_STALE_STATEID:
1412                case -NFS4ERR_BAD_STATEID:
1413                case -NFS4ERR_EXPIRED:
1414                case -NFS4ERR_NO_GRACE:
1415                case -NFS4ERR_STALE_CLIENTID:
1416                case -NFS4ERR_BADSESSION:
1417                case -NFS4ERR_BADSLOT:
1418                case -NFS4ERR_BAD_HIGH_SLOT:
1419                case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1420                        goto out;
1421                default:
1422                        pr_err("NFS: %s: unhandled error %d\n",
1423                                        __func__, status);
1424                case -ENOMEM:
1425                case -NFS4ERR_DENIED:
1426                case -NFS4ERR_RECLAIM_BAD:
1427                case -NFS4ERR_RECLAIM_CONFLICT:
1428                        /* kill_proc(fl->fl_pid, SIGLOST, 1); */
1429                        status = 0;
1430                }
1431                spin_lock(&flctx->flc_lock);
1432        }
1433        if (list == &flctx->flc_posix) {
1434                list = &flctx->flc_flock;
1435                goto restart;
1436        }
1437        spin_unlock(&flctx->flc_lock);
1438out:
1439        up_write(&nfsi->rwsem);
1440        return status;
1441}
1442
1443static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
1444{
1445        struct nfs4_state *state;
1446        struct nfs4_lock_state *lock;
1447        int status = 0;
1448
1449        /* Note: we rely on the sp->so_states list being ordered 
1450         * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
1451         * states first.
1452         * This is needed to ensure that the server won't give us any
1453         * read delegations that we have to return if, say, we are
1454         * recovering after a network partition or a reboot from a
1455         * server that doesn't support a grace period.
1456         */
1457        spin_lock(&sp->so_lock);
1458        raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
1459restart:
1460        list_for_each_entry(state, &sp->so_states, open_states) {
1461                if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1462                        continue;
1463                if (!nfs4_valid_open_stateid(state))
1464                        continue;
1465                if (state->state == 0)
1466                        continue;
1467                atomic_inc(&state->count);
1468                spin_unlock(&sp->so_lock);
1469                status = ops->recover_open(sp, state);
1470                if (status >= 0) {
1471                        status = nfs4_reclaim_locks(state, ops);
1472                        if (status >= 0) {
1473                                if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
1474                                        spin_lock(&state->state_lock);
1475                                        list_for_each_entry(lock, &state->lock_states, ls_locks) {
1476                                                if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
1477                                                        pr_warn_ratelimited("NFS: "
1478                                                                            "%s: Lock reclaim "
1479                                                                            "failed!\n", __func__);
1480                                        }
1481                                        spin_unlock(&state->state_lock);
1482                                }
1483                                nfs4_put_open_state(state);
1484                                clear_bit(NFS_STATE_RECLAIM_NOGRACE,
1485                                        &state->flags);
1486                                spin_lock(&sp->so_lock);
1487                                goto restart;
1488                        }
1489                }
1490                switch (status) {
1491                        default:
1492                                printk(KERN_ERR "NFS: %s: unhandled error %d\n",
1493                                        __func__, status);
1494                        case -ENOENT:
1495                        case -ENOMEM:
1496                        case -ESTALE:
1497                                /* Open state on this file cannot be recovered */
1498                                nfs4_state_mark_recovery_failed(state, status);
1499                                break;
1500                        case -EAGAIN:
1501                                ssleep(1);
1502                        case -NFS4ERR_ADMIN_REVOKED:
1503                        case -NFS4ERR_STALE_STATEID:
1504                        case -NFS4ERR_BAD_STATEID:
1505                        case -NFS4ERR_RECLAIM_BAD:
1506                        case -NFS4ERR_RECLAIM_CONFLICT:
1507                                nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1508                                break;
1509                        case -NFS4ERR_EXPIRED:
1510                        case -NFS4ERR_NO_GRACE:
1511                                nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1512                        case -NFS4ERR_STALE_CLIENTID:
1513                        case -NFS4ERR_BADSESSION:
1514                        case -NFS4ERR_BADSLOT:
1515                        case -NFS4ERR_BAD_HIGH_SLOT:
1516                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1517                                goto out_err;
1518                }
1519                nfs4_put_open_state(state);
1520                spin_lock(&sp->so_lock);
1521                goto restart;
1522        }
1523        raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1524        spin_unlock(&sp->so_lock);
1525        return 0;
1526out_err:
1527        nfs4_put_open_state(state);
1528        spin_lock(&sp->so_lock);
1529        raw_write_seqcount_end(&sp->so_reclaim_seqcount);
1530        spin_unlock(&sp->so_lock);
1531        return status;
1532}
1533
1534static void nfs4_clear_open_state(struct nfs4_state *state)
1535{
1536        struct nfs4_lock_state *lock;
1537
1538        clear_bit(NFS_DELEGATED_STATE, &state->flags);
1539        clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1540        clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1541        clear_bit(NFS_O_RDWR_STATE, &state->flags);
1542        spin_lock(&state->state_lock);
1543        list_for_each_entry(lock, &state->lock_states, ls_locks) {
1544                lock->ls_seqid.flags = 0;
1545                clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags);
1546        }
1547        spin_unlock(&state->state_lock);
1548}
1549
1550static void nfs4_reset_seqids(struct nfs_server *server,
1551        int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1552{
1553        struct nfs_client *clp = server->nfs_client;
1554        struct nfs4_state_owner *sp;
1555        struct rb_node *pos;
1556        struct nfs4_state *state;
1557
1558        spin_lock(&clp->cl_lock);
1559        for (pos = rb_first(&server->state_owners);
1560             pos != NULL;
1561             pos = rb_next(pos)) {
1562                sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1563                sp->so_seqid.flags = 0;
1564                spin_lock(&sp->so_lock);
1565                list_for_each_entry(state, &sp->so_states, open_states) {
1566                        if (mark_reclaim(clp, state))
1567                                nfs4_clear_open_state(state);
1568                }
1569                spin_unlock(&sp->so_lock);
1570        }
1571        spin_unlock(&clp->cl_lock);
1572}
1573
1574static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
1575        int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1576{
1577        struct nfs_server *server;
1578
1579        rcu_read_lock();
1580        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1581                nfs4_reset_seqids(server, mark_reclaim);
1582        rcu_read_unlock();
1583}
1584
1585static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
1586{
1587        /* Mark all delegations for reclaim */
1588        nfs_delegation_mark_reclaim(clp);
1589        nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
1590}
1591
1592static void nfs4_reclaim_complete(struct nfs_client *clp,
1593                                 const struct nfs4_state_recovery_ops *ops,
1594                                 struct rpc_cred *cred)
1595{
1596        /* Notify the server we're done reclaiming our state */
1597        if (ops->reclaim_complete)
1598                (void)ops->reclaim_complete(clp, cred);
1599}
1600
1601static void nfs4_clear_reclaim_server(struct nfs_server *server)
1602{
1603        struct nfs_client *clp = server->nfs_client;
1604        struct nfs4_state_owner *sp;
1605        struct rb_node *pos;
1606        struct nfs4_state *state;
1607
1608        spin_lock(&clp->cl_lock);
1609        for (pos = rb_first(&server->state_owners);
1610             pos != NULL;
1611             pos = rb_next(pos)) {
1612                sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1613                spin_lock(&sp->so_lock);
1614                list_for_each_entry(state, &sp->so_states, open_states) {
1615                        if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
1616                                                &state->flags))
1617                                continue;
1618                        nfs4_state_mark_reclaim_nograce(clp, state);
1619                }
1620                spin_unlock(&sp->so_lock);
1621        }
1622        spin_unlock(&clp->cl_lock);
1623}
1624
1625static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1626{
1627        struct nfs_server *server;
1628
1629        if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1630                return 0;
1631
1632        rcu_read_lock();
1633        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1634                nfs4_clear_reclaim_server(server);
1635        rcu_read_unlock();
1636
1637        nfs_delegation_reap_unclaimed(clp);
1638        return 1;
1639}
1640
1641static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1642{
1643        const struct nfs4_state_recovery_ops *ops;
1644        struct rpc_cred *cred;
1645
1646        if (!nfs4_state_clear_reclaim_reboot(clp))
1647                return;
1648        ops = clp->cl_mvops->reboot_recovery_ops;
1649        cred = nfs4_get_clid_cred(clp);
1650        nfs4_reclaim_complete(clp, ops, cred);
1651        put_rpccred(cred);
1652}
1653
1654static void nfs_delegation_clear_all(struct nfs_client *clp)
1655{
1656        nfs_delegation_mark_reclaim(clp);
1657        nfs_delegation_reap_unclaimed(clp);
1658}
1659
1660static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1661{
1662        nfs_delegation_clear_all(clp);
1663        nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1664}
1665
1666static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1667{
1668        switch (error) {
1669                case 0:
1670                        break;
1671                case -NFS4ERR_CB_PATH_DOWN:
1672                        nfs40_handle_cb_pathdown(clp);
1673                        break;
1674                case -NFS4ERR_NO_GRACE:
1675                        nfs4_state_end_reclaim_reboot(clp);
1676                        break;
1677                case -NFS4ERR_STALE_CLIENTID:
1678                        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1679                        nfs4_state_clear_reclaim_reboot(clp);
1680                        nfs4_state_start_reclaim_reboot(clp);
1681                        break;
1682                case -NFS4ERR_EXPIRED:
1683                        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1684                        nfs4_state_start_reclaim_nograce(clp);
1685                        break;
1686                case -NFS4ERR_BADSESSION:
1687                case -NFS4ERR_BADSLOT:
1688                case -NFS4ERR_BAD_HIGH_SLOT:
1689                case -NFS4ERR_DEADSESSION:
1690                case -NFS4ERR_SEQ_FALSE_RETRY:
1691                case -NFS4ERR_SEQ_MISORDERED:
1692                        set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1693                        /* Zero session reset errors */
1694                        break;
1695                case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1696                        set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1697                        break;
1698                default:
1699                        dprintk("%s: failed to handle error %d for server %s\n",
1700                                        __func__, error, clp->cl_hostname);
1701                        return error;
1702        }
1703        dprintk("%s: handled error %d for server %s\n", __func__, error,
1704                        clp->cl_hostname);
1705        return 0;
1706}
1707
1708static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
1709{
1710        struct nfs4_state_owner *sp;
1711        struct nfs_server *server;
1712        struct rb_node *pos;
1713        int status = 0;
1714
1715restart:
1716        rcu_read_lock();
1717        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1718                nfs4_purge_state_owners(server);
1719                spin_lock(&clp->cl_lock);
1720                for (pos = rb_first(&server->state_owners);
1721                     pos != NULL;
1722                     pos = rb_next(pos)) {
1723                        sp = rb_entry(pos,
1724                                struct nfs4_state_owner, so_server_node);
1725                        if (!test_and_clear_bit(ops->owner_flag_bit,
1726                                                        &sp->so_flags))
1727                                continue;
1728                        if (!atomic_inc_not_zero(&sp->so_count))
1729                                continue;
1730                        spin_unlock(&clp->cl_lock);
1731                        rcu_read_unlock();
1732
1733                        status = nfs4_reclaim_open_state(sp, ops);
1734                        if (status < 0) {
1735                                set_bit(ops->owner_flag_bit, &sp->so_flags);
1736                                nfs4_put_state_owner(sp);
1737                                status = nfs4_recovery_handle_error(clp, status);
1738                                return (status != 0) ? status : -EAGAIN;
1739                        }
1740
1741                        nfs4_put_state_owner(sp);
1742                        goto restart;
1743                }
1744                spin_unlock(&clp->cl_lock);
1745        }
1746        rcu_read_unlock();
1747        return 0;
1748}
1749
1750static int nfs4_check_lease(struct nfs_client *clp)
1751{
1752        struct rpc_cred *cred;
1753        const struct nfs4_state_maintenance_ops *ops =
1754                clp->cl_mvops->state_renewal_ops;
1755        int status;
1756
1757        /* Is the client already known to have an expired lease? */
1758        if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1759                return 0;
1760        spin_lock(&clp->cl_lock);
1761        cred = ops->get_state_renewal_cred_locked(clp);
1762        spin_unlock(&clp->cl_lock);
1763        if (cred == NULL) {
1764                cred = nfs4_get_clid_cred(clp);
1765                status = -ENOKEY;
1766                if (cred == NULL)
1767                        goto out;
1768        }
1769        status = ops->renew_lease(clp, cred);
1770        put_rpccred(cred);
1771        if (status == -ETIMEDOUT) {
1772                set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1773                return 0;
1774        }
1775out:
1776        return nfs4_recovery_handle_error(clp, status);
1777}
1778
1779/* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors
1780 * and for recoverable errors on EXCHANGE_ID for v4.1
1781 */
1782static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1783{
1784        switch (status) {
1785        case -NFS4ERR_SEQ_MISORDERED:
1786                if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
1787                        return -ESERVERFAULT;
1788                /* Lease confirmation error: retry after purging the lease */
1789                ssleep(1);
1790                clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1791                break;
1792        case -NFS4ERR_STALE_CLIENTID:
1793                clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1794                nfs4_state_start_reclaim_reboot(clp);
1795                break;
1796        case -NFS4ERR_CLID_INUSE:
1797                pr_err("NFS: Server %s reports our clientid is in use\n",
1798                        clp->cl_hostname);
1799                nfs_mark_client_ready(clp, -EPERM);
1800                clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1801                return -EPERM;
1802        case -EACCES:
1803        case -NFS4ERR_DELAY:
1804        case -ETIMEDOUT:
1805        case -EAGAIN:
1806                ssleep(1);
1807                break;
1808
1809        case -NFS4ERR_MINOR_VERS_MISMATCH:
1810                if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1811                        nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
1812                dprintk("%s: exit with error %d for server %s\n",
1813                                __func__, -EPROTONOSUPPORT, clp->cl_hostname);
1814                return -EPROTONOSUPPORT;
1815        case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1816                                 * in nfs4_exchange_id */
1817        default:
1818                dprintk("%s: exit with error %d for server %s\n", __func__,
1819                                status, clp->cl_hostname);
1820                return status;
1821        }
1822        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1823        dprintk("%s: handled error %d for server %s\n", __func__, status,
1824                        clp->cl_hostname);
1825        return 0;
1826}
1827
1828static int nfs4_establish_lease(struct nfs_client *clp)
1829{
1830        struct rpc_cred *cred;
1831        const struct nfs4_state_recovery_ops *ops =
1832                clp->cl_mvops->reboot_recovery_ops;
1833        int status;
1834
1835        nfs4_begin_drain_session(clp);
1836        cred = nfs4_get_clid_cred(clp);
1837        if (cred == NULL)
1838                return -ENOENT;
1839        status = ops->establish_clid(clp, cred);
1840        put_rpccred(cred);
1841        if (status != 0)
1842                return status;
1843        pnfs_destroy_all_layouts(clp);
1844        return 0;
1845}
1846
1847/*
1848 * Returns zero or a negative errno.  NFS4ERR values are converted
1849 * to local errno values.
1850 */
1851static int nfs4_reclaim_lease(struct nfs_client *clp)
1852{
1853        int status;
1854
1855        status = nfs4_establish_lease(clp);
1856        if (status < 0)
1857                return nfs4_handle_reclaim_lease_error(clp, status);
1858        if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state))
1859                nfs4_state_start_reclaim_nograce(clp);
1860        if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
1861                set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1862        clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1863        clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1864        return 0;
1865}
1866
1867static int nfs4_purge_lease(struct nfs_client *clp)
1868{
1869        int status;
1870
1871        status = nfs4_establish_lease(clp);
1872        if (status < 0)
1873                return nfs4_handle_reclaim_lease_error(clp, status);
1874        clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1875        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1876        nfs4_state_start_reclaim_nograce(clp);
1877        return 0;
1878}
1879
1880/*
1881 * Try remote migration of one FSID from a source server to a
1882 * destination server.  The source server provides a list of
1883 * potential destinations.
1884 *
1885 * Returns zero or a negative NFS4ERR status code.
1886 */
1887static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
1888{
1889        struct nfs_client *clp = server->nfs_client;
1890        struct nfs4_fs_locations *locations = NULL;
1891        struct inode *inode;
1892        struct page *page;
1893        int status, result;
1894
1895        dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__,
1896                        (unsigned long long)server->fsid.major,
1897                        (unsigned long long)server->fsid.minor,
1898                        clp->cl_hostname);
1899
1900        result = 0;
1901        page = alloc_page(GFP_KERNEL);
1902        locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
1903        if (page == NULL || locations == NULL) {
1904                dprintk("<-- %s: no memory\n", __func__);
1905                goto out;
1906        }
1907
1908        inode = d_inode(server->super->s_root);
1909        result = nfs4_proc_get_locations(inode, locations, page, cred);
1910        if (result) {
1911                dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
1912                        __func__, result);
1913                goto out;
1914        }
1915
1916        result = -NFS4ERR_NXIO;
1917        if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
1918                dprintk("<-- %s: No fs_locations data, migration skipped\n",
1919                        __func__);
1920                goto out;
1921        }
1922
1923        nfs4_begin_drain_session(clp);
1924
1925        status = nfs4_replace_transport(server, locations);
1926        if (status != 0) {
1927                dprintk("<-- %s: failed to replace transport: %d\n",
1928                        __func__, status);
1929                goto out;
1930        }
1931
1932        result = 0;
1933        dprintk("<-- %s: migration succeeded\n", __func__);
1934
1935out:
1936        if (page != NULL)
1937                __free_page(page);
1938        kfree(locations);
1939        if (result) {
1940                pr_err("NFS: migration recovery failed (server %s)\n",
1941                                clp->cl_hostname);
1942                set_bit(NFS_MIG_FAILED, &server->mig_status);
1943        }
1944        return result;
1945}
1946
1947/*
1948 * Returns zero or a negative NFS4ERR status code.
1949 */
1950static int nfs4_handle_migration(struct nfs_client *clp)
1951{
1952        const struct nfs4_state_maintenance_ops *ops =
1953                                clp->cl_mvops->state_renewal_ops;
1954        struct nfs_server *server;
1955        struct rpc_cred *cred;
1956
1957        dprintk("%s: migration reported on \"%s\"\n", __func__,
1958                        clp->cl_hostname);
1959
1960        spin_lock(&clp->cl_lock);
1961        cred = ops->get_state_renewal_cred_locked(clp);
1962        spin_unlock(&clp->cl_lock);
1963        if (cred == NULL)
1964                return -NFS4ERR_NOENT;
1965
1966        clp->cl_mig_gen++;
1967restart:
1968        rcu_read_lock();
1969        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1970                int status;
1971
1972                if (server->mig_gen == clp->cl_mig_gen)
1973                        continue;
1974                server->mig_gen = clp->cl_mig_gen;
1975
1976                if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION,
1977                                                &server->mig_status))
1978                        continue;
1979
1980                rcu_read_unlock();
1981                status = nfs4_try_migration(server, cred);
1982                if (status < 0) {
1983                        put_rpccred(cred);
1984                        return status;
1985                }
1986                goto restart;
1987        }
1988        rcu_read_unlock();
1989        put_rpccred(cred);
1990        return 0;
1991}
1992
1993/*
1994 * Test each nfs_server on the clp's cl_superblocks list to see
1995 * if it's moved to another server.  Stop when the server no longer
1996 * returns NFS4ERR_LEASE_MOVED.
1997 */
1998static int nfs4_handle_lease_moved(struct nfs_client *clp)
1999{
2000        const struct nfs4_state_maintenance_ops *ops =
2001                                clp->cl_mvops->state_renewal_ops;
2002        struct nfs_server *server;
2003        struct rpc_cred *cred;
2004
2005        dprintk("%s: lease moved reported on \"%s\"\n", __func__,
2006                        clp->cl_hostname);
2007
2008        spin_lock(&clp->cl_lock);
2009        cred = ops->get_state_renewal_cred_locked(clp);
2010        spin_unlock(&clp->cl_lock);
2011        if (cred == NULL)
2012                return -NFS4ERR_NOENT;
2013
2014        clp->cl_mig_gen++;
2015restart:
2016        rcu_read_lock();
2017        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
2018                struct inode *inode;
2019                int status;
2020
2021                if (server->mig_gen == clp->cl_mig_gen)
2022                        continue;
2023                server->mig_gen = clp->cl_mig_gen;
2024
2025                rcu_read_unlock();
2026
2027                inode = d_inode(server->super->s_root);
2028                status = nfs4_proc_fsid_present(inode, cred);
2029                if (status != -NFS4ERR_MOVED)
2030                        goto restart;   /* wasn't this one */
2031                if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED)
2032                        goto restart;   /* there are more */
2033                goto out;
2034        }
2035        rcu_read_unlock();
2036
2037out:
2038        put_rpccred(cred);
2039        return 0;
2040}
2041
2042/**
2043 * nfs4_discover_server_trunking - Detect server IP address trunking
2044 *
2045 * @clp: nfs_client under test
2046 * @result: OUT: found nfs_client, or clp
2047 *
2048 * Returns zero or a negative errno.  If zero is returned,
2049 * an nfs_client pointer is planted in "result".
2050 *
2051 * Note: since we are invoked in process context, and
2052 * not from inside the state manager, we cannot use
2053 * nfs4_handle_reclaim_lease_error().
2054 */
2055int nfs4_discover_server_trunking(struct nfs_client *clp,
2056                                  struct nfs_client **result)
2057{
2058        const struct nfs4_state_recovery_ops *ops =
2059                                clp->cl_mvops->reboot_recovery_ops;
2060        struct rpc_clnt *clnt;
2061        struct rpc_cred *cred;
2062        int i, status;
2063
2064        dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
2065
2066        clnt = clp->cl_rpcclient;
2067        i = 0;
2068
2069        mutex_lock(&nfs_clid_init_mutex);
2070again:
2071        status  = -ENOENT;
2072        cred = nfs4_get_clid_cred(clp);
2073        if (cred == NULL)
2074                goto out_unlock;
2075
2076        status = ops->detect_trunking(clp, result, cred);
2077        put_rpccred(cred);
2078        switch (status) {
2079        case 0:
2080                break;
2081        case -ETIMEDOUT:
2082                if (clnt->cl_softrtry)
2083                        break;
2084        case -NFS4ERR_DELAY:
2085        case -EAGAIN:
2086                ssleep(1);
2087        case -NFS4ERR_STALE_CLIENTID:
2088                dprintk("NFS: %s after status %d, retrying\n",
2089                        __func__, status);
2090                goto again;
2091        case -EACCES:
2092                if (i++ == 0) {
2093                        nfs4_root_machine_cred(clp);
2094                        goto again;
2095                }
2096                if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
2097                        break;
2098        case -NFS4ERR_CLID_INUSE:
2099        case -NFS4ERR_WRONGSEC:
2100                /* No point in retrying if we already used RPC_AUTH_UNIX */
2101                if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) {
2102                        status = -EPERM;
2103                        break;
2104                }
2105                clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX);
2106                if (IS_ERR(clnt)) {
2107                        status = PTR_ERR(clnt);
2108                        break;
2109                }
2110                /* Note: this is safe because we haven't yet marked the
2111                 * client as ready, so we are the only user of
2112                 * clp->cl_rpcclient
2113                 */
2114                clnt = xchg(&clp->cl_rpcclient, clnt);
2115                rpc_shutdown_client(clnt);
2116                clnt = clp->cl_rpcclient;
2117                goto again;
2118
2119        case -NFS4ERR_MINOR_VERS_MISMATCH:
2120                status = -EPROTONOSUPPORT;
2121                break;
2122
2123        case -EKEYEXPIRED:
2124        case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
2125                                 * in nfs4_exchange_id */
2126                status = -EKEYEXPIRED;
2127                break;
2128        default:
2129                pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n",
2130                                __func__, status);
2131                status = -EIO;
2132        }
2133
2134out_unlock:
2135        mutex_unlock(&nfs_clid_init_mutex);
2136        dprintk("NFS: %s: status = %d\n", __func__, status);
2137        return status;
2138}
2139
2140#ifdef CONFIG_NFS_V4_1
2141void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
2142{
2143        struct nfs_client *clp = session->clp;
2144
2145        switch (err) {
2146        default:
2147                set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2148                break;
2149        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
2150                set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2151        }
2152        nfs4_schedule_lease_recovery(clp);
2153}
2154EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
2155
2156void nfs41_notify_server(struct nfs_client *clp)
2157{
2158        /* Use CHECK_LEASE to ping the server with a SEQUENCE */
2159        set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
2160        nfs4_schedule_state_manager(clp);
2161}
2162
2163static void nfs4_reset_all_state(struct nfs_client *clp)
2164{
2165        if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
2166                set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
2167                clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
2168                nfs4_state_start_reclaim_nograce(clp);
2169                dprintk("%s: scheduling reset of all state for server %s!\n",
2170                                __func__, clp->cl_hostname);
2171                nfs4_schedule_state_manager(clp);
2172        }
2173}
2174
2175static void nfs41_handle_server_reboot(struct nfs_client *clp)
2176{
2177        if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
2178                nfs4_state_start_reclaim_reboot(clp);
2179                dprintk("%s: server %s rebooted!\n", __func__,
2180                                clp->cl_hostname);
2181                nfs4_schedule_state_manager(clp);
2182        }
2183}
2184
2185static void nfs41_handle_all_state_revoked(struct nfs_client *clp)
2186{
2187        nfs4_reset_all_state(clp);
2188        dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
2189}
2190
2191static void nfs41_handle_some_state_revoked(struct nfs_client *clp)
2192{
2193        nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
2194        nfs4_schedule_state_manager(clp);
2195
2196        dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
2197}
2198
2199static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
2200{
2201        /* FIXME: For now, we destroy all layouts. */
2202        pnfs_destroy_all_layouts(clp);
2203        /* FIXME: For now, we test all delegations+open state+locks. */
2204        nfs41_handle_some_state_revoked(clp);
2205        dprintk("%s: Recallable state revoked on server %s!\n", __func__,
2206                        clp->cl_hostname);
2207}
2208
2209static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
2210{
2211        set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2212        nfs4_schedule_state_manager(clp);
2213
2214        dprintk("%s: server %s declared a backchannel fault\n", __func__,
2215                        clp->cl_hostname);
2216}
2217
2218static void nfs41_handle_cb_path_down(struct nfs_client *clp)
2219{
2220        if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
2221                &clp->cl_state) == 0)
2222                nfs4_schedule_state_manager(clp);
2223}
2224
2225void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
2226{
2227        if (!flags)
2228                return;
2229
2230        dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
2231                __func__, clp->cl_hostname, clp->cl_clientid, flags);
2232
2233        if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
2234                nfs41_handle_server_reboot(clp);
2235        if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED))
2236                nfs41_handle_all_state_revoked(clp);
2237        if (flags & (SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
2238                            SEQ4_STATUS_ADMIN_STATE_REVOKED))
2239                nfs41_handle_some_state_revoked(clp);
2240        if (flags & SEQ4_STATUS_LEASE_MOVED)
2241                nfs4_schedule_lease_moved_recovery(clp);
2242        if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
2243                nfs41_handle_recallable_state_revoked(clp);
2244        if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
2245                nfs41_handle_backchannel_fault(clp);
2246        else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
2247                                SEQ4_STATUS_CB_PATH_DOWN_SESSION))
2248                nfs41_handle_cb_path_down(clp);
2249}
2250
2251static int nfs4_reset_session(struct nfs_client *clp)
2252{
2253        struct rpc_cred *cred;
2254        int status;
2255
2256        if (!nfs4_has_session(clp))
2257                return 0;
2258        nfs4_begin_drain_session(clp);
2259        cred = nfs4_get_clid_cred(clp);
2260        status = nfs4_proc_destroy_session(clp->cl_session, cred);
2261        switch (status) {
2262        case 0:
2263        case -NFS4ERR_BADSESSION:
2264        case -NFS4ERR_DEADSESSION:
2265                break;
2266        case -NFS4ERR_BACK_CHAN_BUSY:
2267        case -NFS4ERR_DELAY:
2268                set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2269                status = 0;
2270                ssleep(1);
2271                goto out;
2272        default:
2273                status = nfs4_recovery_handle_error(clp, status);
2274                goto out;
2275        }
2276
2277        memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
2278        status = nfs4_proc_create_session(clp, cred);
2279        if (status) {
2280                dprintk("%s: session reset failed with status %d for server %s!\n",
2281                        __func__, status, clp->cl_hostname);
2282                status = nfs4_handle_reclaim_lease_error(clp, status);
2283                goto out;
2284        }
2285        nfs41_finish_session_reset(clp);
2286        dprintk("%s: session reset was successful for server %s!\n",
2287                        __func__, clp->cl_hostname);
2288out:
2289        if (cred)
2290                put_rpccred(cred);
2291        return status;
2292}
2293
2294static int nfs4_bind_conn_to_session(struct nfs_client *clp)
2295{
2296        struct rpc_cred *cred;
2297        int ret;
2298
2299        if (!nfs4_has_session(clp))
2300                return 0;
2301        nfs4_begin_drain_session(clp);
2302        cred = nfs4_get_clid_cred(clp);
2303        ret = nfs4_proc_bind_conn_to_session(clp, cred);
2304        if (cred)
2305                put_rpccred(cred);
2306        clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2307        switch (ret) {
2308        case 0:
2309                dprintk("%s: bind_conn_to_session was successful for server %s!\n",
2310                        __func__, clp->cl_hostname);
2311                break;
2312        case -NFS4ERR_DELAY:
2313                ssleep(1);
2314                set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2315                break;
2316        default:
2317                return nfs4_recovery_handle_error(clp, ret);
2318        }
2319        return 0;
2320}
2321#else /* CONFIG_NFS_V4_1 */
2322static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
2323
2324static int nfs4_bind_conn_to_session(struct nfs_client *clp)
2325{
2326        return 0;
2327}
2328#endif /* CONFIG_NFS_V4_1 */
2329
2330static void nfs4_state_manager(struct nfs_client *clp)
2331{
2332        int status = 0;
2333        const char *section = "", *section_sep = "";
2334
2335        /* Ensure exclusive access to NFSv4 state */
2336        do {
2337                if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
2338                        section = "purge state";
2339                        status = nfs4_purge_lease(clp);
2340                        if (status < 0)
2341                                goto out_error;
2342                        continue;
2343                }
2344
2345                if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
2346                        section = "lease expired";
2347                        /* We're going to have to re-establish a clientid */
2348                        status = nfs4_reclaim_lease(clp);
2349                        if (status < 0)
2350                                goto out_error;
2351                        continue;
2352                }
2353
2354                /* Initialize or reset the session */
2355                if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) {
2356                        section = "reset session";
2357                        status = nfs4_reset_session(clp);
2358                        if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
2359                                continue;
2360                        if (status < 0)
2361                                goto out_error;
2362                }
2363
2364                /* Send BIND_CONN_TO_SESSION */
2365                if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
2366                                &clp->cl_state)) {
2367                        section = "bind conn to session";
2368                        status = nfs4_bind_conn_to_session(clp);
2369                        if (status < 0)
2370                                goto out_error;
2371                        continue;
2372                }
2373
2374                if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
2375                        section = "check lease";
2376                        status = nfs4_check_lease(clp);
2377                        if (status < 0)
2378                                goto out_error;
2379                        continue;
2380                }
2381
2382                if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) {
2383                        section = "migration";
2384                        status = nfs4_handle_migration(clp);
2385                        if (status < 0)
2386                                goto out_error;
2387                }
2388
2389                if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) {
2390                        section = "lease moved";
2391                        status = nfs4_handle_lease_moved(clp);
2392                        if (status < 0)
2393                                goto out_error;
2394                }
2395
2396                /* First recover reboot state... */
2397                if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
2398                        section = "reclaim reboot";
2399                        status = nfs4_do_reclaim(clp,
2400                                clp->cl_mvops->reboot_recovery_ops);
2401                        if (status == -EAGAIN)
2402                                continue;
2403                        if (status < 0)
2404                                goto out_error;
2405                        nfs4_state_end_reclaim_reboot(clp);
2406                }
2407
2408                /* Now recover expired state... */
2409                if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
2410                        section = "reclaim nograce";
2411                        status = nfs4_do_reclaim(clp,
2412                                clp->cl_mvops->nograce_recovery_ops);
2413                        if (status == -EAGAIN)
2414                                continue;
2415                        if (status < 0)
2416                                goto out_error;
2417                }
2418
2419                nfs4_end_drain_session(clp);
2420                if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
2421                        nfs_client_return_marked_delegations(clp);
2422                        continue;
2423                }
2424
2425                nfs4_clear_state_manager_bit(clp);
2426                /* Did we race with an attempt to give us more work? */
2427                if (clp->cl_state == 0)
2428                        break;
2429                if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
2430                        break;
2431        } while (atomic_read(&clp->cl_count) > 1);
2432        return;
2433out_error:
2434        if (strlen(section))
2435                section_sep = ": ";
2436        pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
2437                        " with error %d\n", section_sep, section,
2438                        clp->cl_hostname, -status);
2439        ssleep(1);
2440        nfs4_end_drain_session(clp);
2441        nfs4_clear_state_manager_bit(clp);
2442}
2443
2444static int nfs4_run_state_manager(void *ptr)
2445{
2446        struct nfs_client *clp = ptr;
2447
2448        allow_signal(SIGKILL);
2449        nfs4_state_manager(clp);
2450        nfs_put_client(clp);
2451        module_put_and_exit(0);
2452        return 0;
2453}
2454
2455/*
2456 * Local variables:
2457 *  c-basic-offset: 8
2458 * End:
2459 */
2460