linux/fs/nfsd/nfs4state.c
<<
>>
Prefs
   1/*
   2*  linux/fs/nfsd/nfs4state.c
   3*
   4*  Copyright (c) 2001 The Regents of the University of Michigan.
   5*  All rights reserved.
   6*
   7*  Kendrick Smith <kmsmith@umich.edu>
   8*  Andy Adamson <kandros@umich.edu>
   9*
  10*  Redistribution and use in source and binary forms, with or without
  11*  modification, are permitted provided that the following conditions
  12*  are met:
  13*
  14*  1. Redistributions of source code must retain the above copyright
  15*     notice, this list of conditions and the following disclaimer.
  16*  2. Redistributions in binary form must reproduce the above copyright
  17*     notice, this list of conditions and the following disclaimer in the
  18*     documentation and/or other materials provided with the distribution.
  19*  3. Neither the name of the University nor the names of its
  20*     contributors may be used to endorse or promote products derived
  21*     from this software without specific prior written permission.
  22*
  23*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  24*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  25*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  26*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  27*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  28*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  29*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  30*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  31*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  32*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34*
  35*/
  36
  37#include <linux/param.h>
  38#include <linux/major.h>
  39#include <linux/slab.h>
  40
  41#include <linux/sunrpc/svc.h>
  42#include <linux/nfsd/nfsd.h>
  43#include <linux/nfsd/cache.h>
  44#include <linux/file.h>
  45#include <linux/mount.h>
  46#include <linux/workqueue.h>
  47#include <linux/smp_lock.h>
  48#include <linux/kthread.h>
  49#include <linux/nfs4.h>
  50#include <linux/nfsd/state.h>
  51#include <linux/nfsd/xdr4.h>
  52#include <linux/namei.h>
  53#include <linux/swap.h>
  54#include <linux/mutex.h>
  55#include <linux/lockd/bind.h>
  56#include <linux/module.h>
  57#include <linux/sunrpc/svcauth_gss.h>
  58#include <linux/sunrpc/clnt.h>
  59
  60#define NFSDDBG_FACILITY                NFSDDBG_PROC
  61
  62/* Globals */
  63static time_t lease_time = 90;     /* default lease time */
  64static time_t user_lease_time = 90;
  65static time_t boot_time;
  66static u32 current_ownerid = 1;
  67static u32 current_fileid = 1;
  68static u32 current_delegid = 1;
  69static u32 nfs4_init;
  70static stateid_t zerostateid;             /* bits all 0 */
  71static stateid_t onestateid;              /* bits all 1 */
  72static u64 current_sessionid = 1;
  73
  74#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
  75#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
  76
  77/* forward declarations */
  78static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
  79static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
  80static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
  81static void nfs4_set_recdir(char *recdir);
  82
  83/* Locking: */
  84
  85/* Currently used for almost all code touching nfsv4 state: */
  86static DEFINE_MUTEX(client_mutex);
  87
  88/*
  89 * Currently used for the del_recall_lru and file hash table.  In an
  90 * effort to decrease the scope of the client_mutex, this spinlock may
  91 * eventually cover more:
  92 */
  93static DEFINE_SPINLOCK(recall_lock);
  94
  95static struct kmem_cache *stateowner_slab = NULL;
  96static struct kmem_cache *file_slab = NULL;
  97static struct kmem_cache *stateid_slab = NULL;
  98static struct kmem_cache *deleg_slab = NULL;
  99
 100void
 101nfs4_lock_state(void)
 102{
 103        mutex_lock(&client_mutex);
 104}
 105
 106void
 107nfs4_unlock_state(void)
 108{
 109        mutex_unlock(&client_mutex);
 110}
 111
 112static inline u32
 113opaque_hashval(const void *ptr, int nbytes)
 114{
 115        unsigned char *cptr = (unsigned char *) ptr;
 116
 117        u32 x = 0;
 118        while (nbytes--) {
 119                x *= 37;
 120                x += *cptr++;
 121        }
 122        return x;
 123}
 124
 125static struct list_head del_recall_lru;
 126
 127static inline void
 128put_nfs4_file(struct nfs4_file *fi)
 129{
 130        if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
 131                list_del(&fi->fi_hash);
 132                spin_unlock(&recall_lock);
 133                iput(fi->fi_inode);
 134                kmem_cache_free(file_slab, fi);
 135        }
 136}
 137
 138static inline void
 139get_nfs4_file(struct nfs4_file *fi)
 140{
 141        atomic_inc(&fi->fi_ref);
 142}
 143
 144static int num_delegations;
 145unsigned int max_delegations;
 146
 147/*
 148 * Open owner state (share locks)
 149 */
 150
 151/* hash tables for nfs4_stateowner */
 152#define OWNER_HASH_BITS              8
 153#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
 154#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
 155
 156#define ownerid_hashval(id) \
 157        ((id) & OWNER_HASH_MASK)
 158#define ownerstr_hashval(clientid, ownername) \
 159        (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
 160
 161static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
 162static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
 163
 164/* hash table for nfs4_file */
 165#define FILE_HASH_BITS                   8
 166#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
 167#define FILE_HASH_MASK                  (FILE_HASH_SIZE - 1)
 168/* hash table for (open)nfs4_stateid */
 169#define STATEID_HASH_BITS              10
 170#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
 171#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
 172
 173#define file_hashval(x) \
 174        hash_ptr(x, FILE_HASH_BITS)
 175#define stateid_hashval(owner_id, file_id)  \
 176        (((owner_id) + (file_id)) & STATEID_HASH_MASK)
 177
 178static struct list_head file_hashtbl[FILE_HASH_SIZE];
 179static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
 180
 181static struct nfs4_delegation *
 182alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
 183{
 184        struct nfs4_delegation *dp;
 185        struct nfs4_file *fp = stp->st_file;
 186        struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
 187
 188        dprintk("NFSD alloc_init_deleg\n");
 189        if (fp->fi_had_conflict)
 190                return NULL;
 191        if (num_delegations > max_delegations)
 192                return NULL;
 193        dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
 194        if (dp == NULL)
 195                return dp;
 196        num_delegations++;
 197        INIT_LIST_HEAD(&dp->dl_perfile);
 198        INIT_LIST_HEAD(&dp->dl_perclnt);
 199        INIT_LIST_HEAD(&dp->dl_recall_lru);
 200        dp->dl_client = clp;
 201        get_nfs4_file(fp);
 202        dp->dl_file = fp;
 203        dp->dl_flock = NULL;
 204        get_file(stp->st_vfs_file);
 205        dp->dl_vfs_file = stp->st_vfs_file;
 206        dp->dl_type = type;
 207        dp->dl_ident = cb->cb_ident;
 208        dp->dl_stateid.si_boot = get_seconds();
 209        dp->dl_stateid.si_stateownerid = current_delegid++;
 210        dp->dl_stateid.si_fileid = 0;
 211        dp->dl_stateid.si_generation = 0;
 212        fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
 213        dp->dl_time = 0;
 214        atomic_set(&dp->dl_count, 1);
 215        list_add(&dp->dl_perfile, &fp->fi_delegations);
 216        list_add(&dp->dl_perclnt, &clp->cl_delegations);
 217        return dp;
 218}
 219
 220void
 221nfs4_put_delegation(struct nfs4_delegation *dp)
 222{
 223        if (atomic_dec_and_test(&dp->dl_count)) {
 224                dprintk("NFSD: freeing dp %p\n",dp);
 225                put_nfs4_file(dp->dl_file);
 226                kmem_cache_free(deleg_slab, dp);
 227                num_delegations--;
 228        }
 229}
 230
 231/* Remove the associated file_lock first, then remove the delegation.
 232 * lease_modify() is called to remove the FS_LEASE file_lock from
 233 * the i_flock list, eventually calling nfsd's lock_manager
 234 * fl_release_callback.
 235 */
 236static void
 237nfs4_close_delegation(struct nfs4_delegation *dp)
 238{
 239        struct file *filp = dp->dl_vfs_file;
 240
 241        dprintk("NFSD: close_delegation dp %p\n",dp);
 242        dp->dl_vfs_file = NULL;
 243        /* The following nfsd_close may not actually close the file,
 244         * but we want to remove the lease in any case. */
 245        if (dp->dl_flock)
 246                vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
 247        nfsd_close(filp);
 248}
 249
 250/* Called under the state lock. */
 251static void
 252unhash_delegation(struct nfs4_delegation *dp)
 253{
 254        list_del_init(&dp->dl_perfile);
 255        list_del_init(&dp->dl_perclnt);
 256        spin_lock(&recall_lock);
 257        list_del_init(&dp->dl_recall_lru);
 258        spin_unlock(&recall_lock);
 259        nfs4_close_delegation(dp);
 260        nfs4_put_delegation(dp);
 261}
 262
 263/* 
 264 * SETCLIENTID state 
 265 */
 266
 267/* Hash tables for nfs4_clientid state */
 268#define CLIENT_HASH_BITS                 4
 269#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
 270#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
 271
 272#define clientid_hashval(id) \
 273        ((id) & CLIENT_HASH_MASK)
 274#define clientstr_hashval(name) \
 275        (opaque_hashval((name), 8) & CLIENT_HASH_MASK)
 276/*
 277 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
 278 * used in reboot/reset lease grace period processing
 279 *
 280 * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
 281 * setclientid_confirmed info. 
 282 *
 283 * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
 284 * setclientid info.
 285 *
 286 * client_lru holds client queue ordered by nfs4_client.cl_time
 287 * for lease renewal.
 288 *
 289 * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
 290 * for last close replay.
 291 */
 292static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE];
 293static int reclaim_str_hashtbl_size = 0;
 294static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE];
 295static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE];
 296static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE];
 297static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
 298static struct list_head client_lru;
 299static struct list_head close_lru;
 300
 301static void unhash_generic_stateid(struct nfs4_stateid *stp)
 302{
 303        list_del(&stp->st_hash);
 304        list_del(&stp->st_perfile);
 305        list_del(&stp->st_perstateowner);
 306}
 307
 308static void free_generic_stateid(struct nfs4_stateid *stp)
 309{
 310        put_nfs4_file(stp->st_file);
 311        kmem_cache_free(stateid_slab, stp);
 312}
 313
 314static void release_lock_stateid(struct nfs4_stateid *stp)
 315{
 316        unhash_generic_stateid(stp);
 317        locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner);
 318        free_generic_stateid(stp);
 319}
 320
 321static void unhash_lockowner(struct nfs4_stateowner *sop)
 322{
 323        struct nfs4_stateid *stp;
 324
 325        list_del(&sop->so_idhash);
 326        list_del(&sop->so_strhash);
 327        list_del(&sop->so_perstateid);
 328        while (!list_empty(&sop->so_stateids)) {
 329                stp = list_first_entry(&sop->so_stateids,
 330                                struct nfs4_stateid, st_perstateowner);
 331                release_lock_stateid(stp);
 332        }
 333}
 334
 335static void release_lockowner(struct nfs4_stateowner *sop)
 336{
 337        unhash_lockowner(sop);
 338        nfs4_put_stateowner(sop);
 339}
 340
 341static void
 342release_stateid_lockowners(struct nfs4_stateid *open_stp)
 343{
 344        struct nfs4_stateowner *lock_sop;
 345
 346        while (!list_empty(&open_stp->st_lockowners)) {
 347                lock_sop = list_entry(open_stp->st_lockowners.next,
 348                                struct nfs4_stateowner, so_perstateid);
 349                /* list_del(&open_stp->st_lockowners);  */
 350                BUG_ON(lock_sop->so_is_open_owner);
 351                release_lockowner(lock_sop);
 352        }
 353}
 354
 355static void release_open_stateid(struct nfs4_stateid *stp)
 356{
 357        unhash_generic_stateid(stp);
 358        release_stateid_lockowners(stp);
 359        nfsd_close(stp->st_vfs_file);
 360        free_generic_stateid(stp);
 361}
 362
 363static void unhash_openowner(struct nfs4_stateowner *sop)
 364{
 365        struct nfs4_stateid *stp;
 366
 367        list_del(&sop->so_idhash);
 368        list_del(&sop->so_strhash);
 369        list_del(&sop->so_perclient);
 370        list_del(&sop->so_perstateid); /* XXX: necessary? */
 371        while (!list_empty(&sop->so_stateids)) {
 372                stp = list_first_entry(&sop->so_stateids,
 373                                struct nfs4_stateid, st_perstateowner);
 374                release_open_stateid(stp);
 375        }
 376}
 377
 378static void release_openowner(struct nfs4_stateowner *sop)
 379{
 380        unhash_openowner(sop);
 381        list_del(&sop->so_close_lru);
 382        nfs4_put_stateowner(sop);
 383}
 384
 385static DEFINE_SPINLOCK(sessionid_lock);
 386#define SESSION_HASH_SIZE       512
 387static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
 388
 389static inline int
 390hash_sessionid(struct nfs4_sessionid *sessionid)
 391{
 392        struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;
 393
 394        return sid->sequence % SESSION_HASH_SIZE;
 395}
 396
 397static inline void
 398dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
 399{
 400        u32 *ptr = (u32 *)(&sessionid->data[0]);
 401        dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
 402}
 403
 404static void
 405gen_sessionid(struct nfsd4_session *ses)
 406{
 407        struct nfs4_client *clp = ses->se_client;
 408        struct nfsd4_sessionid *sid;
 409
 410        sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
 411        sid->clientid = clp->cl_clientid;
 412        sid->sequence = current_sessionid++;
 413        sid->reserved = 0;
 414}
 415
 416/*
 417 * The protocol defines ca_maxresponssize_cached to include the size of
 418 * the rpc header, but all we need to cache is the data starting after
 419 * the end of the initial SEQUENCE operation--the rest we regenerate
 420 * each time.  Therefore we can advertise a ca_maxresponssize_cached
 421 * value that is the number of bytes in our cache plus a few additional
 422 * bytes.  In order to stay on the safe side, and not promise more than
 423 * we can cache, those additional bytes must be the minimum possible: 24
 424 * bytes of rpc header (xid through accept state, with AUTH_NULL
 425 * verifier), 12 for the compound header (with zero-length tag), and 44
 426 * for the SEQUENCE op response:
 427 */
 428#define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 429
 430/*
 431 * Give the client the number of ca_maxresponsesize_cached slots it
 432 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
 433 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
 434 * than NFSD_MAX_SLOTS_PER_SESSION.
 435 *
 436 * If we run out of reserved DRC memory we should (up to a point)
 437 * re-negotiate active sessions and reduce their slot usage to make
 438 * rooom for new connections. For now we just fail the create session.
 439 */
 440static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
 441{
 442        int mem, size = fchan->maxresp_cached;
 443
 444        if (fchan->maxreqs < 1)
 445                return nfserr_inval;
 446
 447        if (size < NFSD_MIN_HDR_SEQ_SZ)
 448                size = NFSD_MIN_HDR_SEQ_SZ;
 449        size -= NFSD_MIN_HDR_SEQ_SZ;
 450        if (size > NFSD_SLOT_CACHE_SIZE)
 451                size = NFSD_SLOT_CACHE_SIZE;
 452
 453        /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
 454        mem = fchan->maxreqs * size;
 455        if (mem > NFSD_MAX_MEM_PER_SESSION) {
 456                fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
 457                if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 458                        fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 459                mem = fchan->maxreqs * size;
 460        }
 461
 462        spin_lock(&nfsd_drc_lock);
 463        /* bound the total session drc memory ussage */
 464        if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
 465                fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
 466                mem = fchan->maxreqs * size;
 467        }
 468        nfsd_drc_mem_used += mem;
 469        spin_unlock(&nfsd_drc_lock);
 470
 471        if (fchan->maxreqs == 0)
 472                return nfserr_serverfault;
 473
 474        fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
 475        return 0;
 476}
 477
 478/*
 479 * fchan holds the client values on input, and the server values on output
 480 */
 481static int init_forechannel_attrs(struct svc_rqst *rqstp,
 482                                  struct nfsd4_channel_attrs *session_fchan,
 483                                  struct nfsd4_channel_attrs *fchan)
 484{
 485        int status = 0;
 486        __u32   maxcount = svc_max_payload(rqstp);
 487
 488        /* headerpadsz set to zero in encode routine */
 489
 490        /* Use the client's max request and max response size if possible */
 491        if (fchan->maxreq_sz > maxcount)
 492                fchan->maxreq_sz = maxcount;
 493        session_fchan->maxreq_sz = fchan->maxreq_sz;
 494
 495        if (fchan->maxresp_sz > maxcount)
 496                fchan->maxresp_sz = maxcount;
 497        session_fchan->maxresp_sz = fchan->maxresp_sz;
 498
 499        /* Use the client's maxops if possible */
 500        if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
 501                fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
 502        session_fchan->maxops = fchan->maxops;
 503
 504        /* FIXME: Error means no more DRC pages so the server should
 505         * recover pages from existing sessions. For now fail session
 506         * creation.
 507         */
 508        status = set_forechannel_drc_size(fchan);
 509
 510        session_fchan->maxresp_cached = fchan->maxresp_cached;
 511        session_fchan->maxreqs = fchan->maxreqs;
 512
 513        dprintk("%s status %d\n", __func__, status);
 514        return status;
 515}
 516
 517static void
 518free_session_slots(struct nfsd4_session *ses)
 519{
 520        int i;
 521
 522        for (i = 0; i < ses->se_fchannel.maxreqs; i++)
 523                kfree(ses->se_slots[i]);
 524}
 525
 526static int
 527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 528                   struct nfsd4_create_session *cses)
 529{
 530        struct nfsd4_session *new, tmp;
 531        struct nfsd4_slot *sp;
 532        int idx, slotsize, cachesize, i;
 533        int status;
 534
 535        memset(&tmp, 0, sizeof(tmp));
 536
 537        /* FIXME: For now, we just accept the client back channel attributes. */
 538        tmp.se_bchannel = cses->back_channel;
 539        status = init_forechannel_attrs(rqstp, &tmp.se_fchannel,
 540                                        &cses->fore_channel);
 541        if (status)
 542                goto out;
 543
 544        BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
 545                     + sizeof(struct nfsd4_session) > PAGE_SIZE);
 546
 547        status = nfserr_serverfault;
 548        /* allocate struct nfsd4_session and slot table pointers in one piece */
 549        slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
 550        new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
 551        if (!new)
 552                goto out;
 553
 554        memcpy(new, &tmp, sizeof(*new));
 555
 556        /* allocate each struct nfsd4_slot and data cache in one piece */
 557        cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
 558        for (i = 0; i < new->se_fchannel.maxreqs; i++) {
 559                sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
 560                if (!sp)
 561                        goto out_free;
 562                new->se_slots[i] = sp;
 563        }
 564
 565        new->se_client = clp;
 566        gen_sessionid(new);
 567        idx = hash_sessionid(&new->se_sessionid);
 568        memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
 569               NFS4_MAX_SESSIONID_LEN);
 570
 571        new->se_flags = cses->flags;
 572        kref_init(&new->se_ref);
 573        spin_lock(&sessionid_lock);
 574        list_add(&new->se_hash, &sessionid_hashtbl[idx]);
 575        list_add(&new->se_perclnt, &clp->cl_sessions);
 576        spin_unlock(&sessionid_lock);
 577
 578        status = nfs_ok;
 579out:
 580        return status;
 581out_free:
 582        free_session_slots(new);
 583        kfree(new);
 584        goto out;
 585}
 586
 587/* caller must hold sessionid_lock */
 588static struct nfsd4_session *
 589find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
 590{
 591        struct nfsd4_session *elem;
 592        int idx;
 593
 594        dump_sessionid(__func__, sessionid);
 595        idx = hash_sessionid(sessionid);
 596        dprintk("%s: idx is %d\n", __func__, idx);
 597        /* Search in the appropriate list */
 598        list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
 599                dump_sessionid("list traversal", &elem->se_sessionid);
 600                if (!memcmp(elem->se_sessionid.data, sessionid->data,
 601                            NFS4_MAX_SESSIONID_LEN)) {
 602                        return elem;
 603                }
 604        }
 605
 606        dprintk("%s: session not found\n", __func__);
 607        return NULL;
 608}
 609
 610/* caller must hold sessionid_lock */
 611static void
 612unhash_session(struct nfsd4_session *ses)
 613{
 614        list_del(&ses->se_hash);
 615        list_del(&ses->se_perclnt);
 616}
 617
 618static void
 619release_session(struct nfsd4_session *ses)
 620{
 621        spin_lock(&sessionid_lock);
 622        unhash_session(ses);
 623        spin_unlock(&sessionid_lock);
 624        nfsd4_put_session(ses);
 625}
 626
 627void
 628free_session(struct kref *kref)
 629{
 630        struct nfsd4_session *ses;
 631
 632        ses = container_of(kref, struct nfsd4_session, se_ref);
 633        spin_lock(&nfsd_drc_lock);
 634        nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
 635        spin_unlock(&nfsd_drc_lock);
 636        free_session_slots(ses);
 637        kfree(ses);
 638}
 639
 640static inline void
 641renew_client(struct nfs4_client *clp)
 642{
 643        /*
 644        * Move client to the end to the LRU list.
 645        */
 646        dprintk("renewing client (clientid %08x/%08x)\n", 
 647                        clp->cl_clientid.cl_boot, 
 648                        clp->cl_clientid.cl_id);
 649        list_move_tail(&clp->cl_lru, &client_lru);
 650        clp->cl_time = get_seconds();
 651}
 652
 653/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
 654static int
 655STALE_CLIENTID(clientid_t *clid)
 656{
 657        if (clid->cl_boot == boot_time)
 658                return 0;
 659        dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
 660                clid->cl_boot, clid->cl_id, boot_time);
 661        return 1;
 662}
 663
 664/* 
 665 * XXX Should we use a slab cache ?
 666 * This type of memory management is somewhat inefficient, but we use it
 667 * anyway since SETCLIENTID is not a common operation.
 668 */
 669static struct nfs4_client *alloc_client(struct xdr_netobj name)
 670{
 671        struct nfs4_client *clp;
 672
 673        clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
 674        if (clp == NULL)
 675                return NULL;
 676        clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
 677        if (clp->cl_name.data == NULL) {
 678                kfree(clp);
 679                return NULL;
 680        }
 681        memcpy(clp->cl_name.data, name.data, name.len);
 682        clp->cl_name.len = name.len;
 683        return clp;
 684}
 685
 686static void
 687shutdown_callback_client(struct nfs4_client *clp)
 688{
 689        struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
 690
 691        if (clnt) {
 692                /*
 693                 * Callback threads take a reference on the client, so there
 694                 * should be no outstanding callbacks at this point.
 695                 */
 696                clp->cl_cb_conn.cb_client = NULL;
 697                rpc_shutdown_client(clnt);
 698        }
 699}
 700
 701static inline void
 702free_client(struct nfs4_client *clp)
 703{
 704        shutdown_callback_client(clp);
 705        if (clp->cl_cb_xprt)
 706                svc_xprt_put(clp->cl_cb_xprt);
 707        if (clp->cl_cred.cr_group_info)
 708                put_group_info(clp->cl_cred.cr_group_info);
 709        kfree(clp->cl_principal);
 710        kfree(clp->cl_name.data);
 711        kfree(clp);
 712}
 713
 714void
 715put_nfs4_client(struct nfs4_client *clp)
 716{
 717        if (atomic_dec_and_test(&clp->cl_count))
 718                free_client(clp);
 719}
 720
 721static void
 722expire_client(struct nfs4_client *clp)
 723{
 724        struct nfs4_stateowner *sop;
 725        struct nfs4_delegation *dp;
 726        struct list_head reaplist;
 727
 728        dprintk("NFSD: expire_client cl_count %d\n",
 729                            atomic_read(&clp->cl_count));
 730
 731        INIT_LIST_HEAD(&reaplist);
 732        spin_lock(&recall_lock);
 733        while (!list_empty(&clp->cl_delegations)) {
 734                dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
 735                dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
 736                                dp->dl_flock);
 737                list_del_init(&dp->dl_perclnt);
 738                list_move(&dp->dl_recall_lru, &reaplist);
 739        }
 740        spin_unlock(&recall_lock);
 741        while (!list_empty(&reaplist)) {
 742                dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
 743                list_del_init(&dp->dl_recall_lru);
 744                unhash_delegation(dp);
 745        }
 746        list_del(&clp->cl_idhash);
 747        list_del(&clp->cl_strhash);
 748        list_del(&clp->cl_lru);
 749        while (!list_empty(&clp->cl_openowners)) {
 750                sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
 751                release_openowner(sop);
 752        }
 753        while (!list_empty(&clp->cl_sessions)) {
 754                struct nfsd4_session  *ses;
 755                ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
 756                                 se_perclnt);
 757                release_session(ses);
 758        }
 759        put_nfs4_client(clp);
 760}
 761
 762static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
 763{
 764        memcpy(target->cl_verifier.data, source->data,
 765                        sizeof(target->cl_verifier.data));
 766}
 767
 768static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
 769{
 770        target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; 
 771        target->cl_clientid.cl_id = source->cl_clientid.cl_id; 
 772}
 773
 774static void copy_cred(struct svc_cred *target, struct svc_cred *source)
 775{
 776        target->cr_uid = source->cr_uid;
 777        target->cr_gid = source->cr_gid;
 778        target->cr_group_info = source->cr_group_info;
 779        get_group_info(target->cr_group_info);
 780}
 781
 782static int same_name(const char *n1, const char *n2)
 783{
 784        return 0 == memcmp(n1, n2, HEXDIR_LEN);
 785}
 786
 787static int
 788same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
 789{
 790        return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
 791}
 792
 793static int
 794same_clid(clientid_t *cl1, clientid_t *cl2)
 795{
 796        return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
 797}
 798
 799/* XXX what about NGROUP */
 800static int
 801same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 802{
 803        return cr1->cr_uid == cr2->cr_uid;
 804}
 805
 806static void gen_clid(struct nfs4_client *clp)
 807{
 808        static u32 current_clientid = 1;
 809
 810        clp->cl_clientid.cl_boot = boot_time;
 811        clp->cl_clientid.cl_id = current_clientid++; 
 812}
 813
 814static void gen_confirm(struct nfs4_client *clp)
 815{
 816        static u32 i;
 817        u32 *p;
 818
 819        p = (u32 *)clp->cl_confirm.data;
 820        *p++ = get_seconds();
 821        *p++ = i++;
 822}
 823
 824static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 825                struct svc_rqst *rqstp, nfs4_verifier *verf)
 826{
 827        struct nfs4_client *clp;
 828        struct sockaddr *sa = svc_addr(rqstp);
 829        char *princ;
 830
 831        clp = alloc_client(name);
 832        if (clp == NULL)
 833                return NULL;
 834
 835        princ = svc_gss_principal(rqstp);
 836        if (princ) {
 837                clp->cl_principal = kstrdup(princ, GFP_KERNEL);
 838                if (clp->cl_principal == NULL) {
 839                        free_client(clp);
 840                        return NULL;
 841                }
 842        }
 843
 844        memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 845        atomic_set(&clp->cl_count, 1);
 846        atomic_set(&clp->cl_cb_conn.cb_set, 0);
 847        INIT_LIST_HEAD(&clp->cl_idhash);
 848        INIT_LIST_HEAD(&clp->cl_strhash);
 849        INIT_LIST_HEAD(&clp->cl_openowners);
 850        INIT_LIST_HEAD(&clp->cl_delegations);
 851        INIT_LIST_HEAD(&clp->cl_sessions);
 852        INIT_LIST_HEAD(&clp->cl_lru);
 853        clear_bit(0, &clp->cl_cb_slot_busy);
 854        rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 855        copy_verf(clp, verf);
 856        rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
 857        clp->cl_flavor = rqstp->rq_flavor;
 858        copy_cred(&clp->cl_cred, &rqstp->rq_cred);
 859        gen_confirm(clp);
 860
 861        return clp;
 862}
 863
 864static int check_name(struct xdr_netobj name)
 865{
 866        if (name.len == 0) 
 867                return 0;
 868        if (name.len > NFS4_OPAQUE_LIMIT) {
 869                dprintk("NFSD: check_name: name too long(%d)!\n", name.len);
 870                return 0;
 871        }
 872        return 1;
 873}
 874
 875static void
 876add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 877{
 878        unsigned int idhashval;
 879
 880        list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
 881        idhashval = clientid_hashval(clp->cl_clientid.cl_id);
 882        list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
 883        list_add_tail(&clp->cl_lru, &client_lru);
 884        clp->cl_time = get_seconds();
 885}
 886
 887static void
 888move_to_confirmed(struct nfs4_client *clp)
 889{
 890        unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
 891        unsigned int strhashval;
 892
 893        dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
 894        list_del_init(&clp->cl_strhash);
 895        list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
 896        strhashval = clientstr_hashval(clp->cl_recdir);
 897        list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
 898        renew_client(clp);
 899}
 900
 901static struct nfs4_client *
 902find_confirmed_client(clientid_t *clid)
 903{
 904        struct nfs4_client *clp;
 905        unsigned int idhashval = clientid_hashval(clid->cl_id);
 906
 907        list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
 908                if (same_clid(&clp->cl_clientid, clid))
 909                        return clp;
 910        }
 911        return NULL;
 912}
 913
 914static struct nfs4_client *
 915find_unconfirmed_client(clientid_t *clid)
 916{
 917        struct nfs4_client *clp;
 918        unsigned int idhashval = clientid_hashval(clid->cl_id);
 919
 920        list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
 921                if (same_clid(&clp->cl_clientid, clid))
 922                        return clp;
 923        }
 924        return NULL;
 925}
 926
 927/*
 928 * Return 1 iff clp's clientid establishment method matches the use_exchange_id
 929 * parameter. Matching is based on the fact the at least one of the
 930 * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1
 931 *
 932 * FIXME: we need to unify the clientid namespaces for nfsv4.x
 933 * and correctly deal with client upgrade/downgrade in EXCHANGE_ID
 934 * and SET_CLIENTID{,_CONFIRM}
 935 */
 936static inline int
 937match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id)
 938{
 939        bool has_exchange_flags = (clp->cl_exchange_flags != 0);
 940        return use_exchange_id == has_exchange_flags;
 941}
 942
 943static struct nfs4_client *
 944find_confirmed_client_by_str(const char *dname, unsigned int hashval,
 945                             bool use_exchange_id)
 946{
 947        struct nfs4_client *clp;
 948
 949        list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
 950                if (same_name(clp->cl_recdir, dname) &&
 951                    match_clientid_establishment(clp, use_exchange_id))
 952                        return clp;
 953        }
 954        return NULL;
 955}
 956
 957static struct nfs4_client *
 958find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
 959                               bool use_exchange_id)
 960{
 961        struct nfs4_client *clp;
 962
 963        list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
 964                if (same_name(clp->cl_recdir, dname) &&
 965                    match_clientid_establishment(clp, use_exchange_id))
 966                        return clp;
 967        }
 968        return NULL;
 969}
 970
 971static void
 972gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
 973{
 974        struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 975        unsigned short expected_family;
 976
 977        /* Currently, we only support tcp and tcp6 for the callback channel */
 978        if (se->se_callback_netid_len == 3 &&
 979            !memcmp(se->se_callback_netid_val, "tcp", 3))
 980                expected_family = AF_INET;
 981        else if (se->se_callback_netid_len == 4 &&
 982                 !memcmp(se->se_callback_netid_val, "tcp6", 4))
 983                expected_family = AF_INET6;
 984        else
 985                goto out_err;
 986
 987        cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
 988                                            se->se_callback_addr_len,
 989                                            (struct sockaddr *) &cb->cb_addr,
 990                                            sizeof(cb->cb_addr));
 991
 992        if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
 993                goto out_err;
 994
 995        if (cb->cb_addr.ss_family == AF_INET6)
 996                ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
 997
 998        cb->cb_minorversion = 0;
 999        cb->cb_prog = se->se_callback_prog;
1000        cb->cb_ident = se->se_callback_ident;
1001        return;
1002out_err:
1003        cb->cb_addr.ss_family = AF_UNSPEC;
1004        cb->cb_addrlen = 0;
1005        dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
1006                "will not receive delegations\n",
1007                clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
1008
1009        return;
1010}
1011
1012/*
1013 * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
1014 */
1015void
1016nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1017{
1018        struct nfsd4_slot *slot = resp->cstate.slot;
1019        unsigned int base;
1020
1021        dprintk("--> %s slot %p\n", __func__, slot);
1022
1023        slot->sl_opcnt = resp->opcnt;
1024        slot->sl_status = resp->cstate.status;
1025
1026        if (nfsd4_not_cached(resp)) {
1027                slot->sl_datalen = 0;
1028                return;
1029        }
1030        slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
1031        base = (char *)resp->cstate.datap -
1032                                        (char *)resp->xbuf->head[0].iov_base;
1033        if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
1034                                    slot->sl_datalen))
1035                WARN("%s: sessions DRC could not cache compound\n", __func__);
1036        return;
1037}
1038
1039/*
1040 * Encode the replay sequence operation from the slot values.
1041 * If cachethis is FALSE encode the uncached rep error on the next
1042 * operation which sets resp->p and increments resp->opcnt for
1043 * nfs4svc_encode_compoundres.
1044 *
1045 */
1046static __be32
1047nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
1048                          struct nfsd4_compoundres *resp)
1049{
1050        struct nfsd4_op *op;
1051        struct nfsd4_slot *slot = resp->cstate.slot;
1052
1053        dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
1054                resp->opcnt, resp->cstate.slot->sl_cachethis);
1055
1056        /* Encode the replayed sequence operation */
1057        op = &args->ops[resp->opcnt - 1];
1058        nfsd4_encode_operation(resp, op);
1059
1060        /* Return nfserr_retry_uncached_rep in next operation. */
1061        if (args->opcnt > 1 && slot->sl_cachethis == 0) {
1062                op = &args->ops[resp->opcnt++];
1063                op->status = nfserr_retry_uncached_rep;
1064                nfsd4_encode_operation(resp, op);
1065        }
1066        return op->status;
1067}
1068
1069/*
1070 * The sequence operation is not cached because we can use the slot and
1071 * session values.
1072 */
1073__be32
1074nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1075                         struct nfsd4_sequence *seq)
1076{
1077        struct nfsd4_slot *slot = resp->cstate.slot;
1078        __be32 status;
1079
1080        dprintk("--> %s slot %p\n", __func__, slot);
1081
1082        /* Either returns 0 or nfserr_retry_uncached */
1083        status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
1084        if (status == nfserr_retry_uncached_rep)
1085                return status;
1086
1087        /* The sequence operation has been encoded, cstate->datap set. */
1088        memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
1089
1090        resp->opcnt = slot->sl_opcnt;
1091        resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
1092        status = slot->sl_status;
1093
1094        return status;
1095}
1096
1097/*
1098 * Set the exchange_id flags returned by the server.
1099 */
1100static void
1101nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
1102{
1103        /* pNFS is not supported */
1104        new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
1105
1106        /* Referrals are supported, Migration is not. */
1107        new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
1108
1109        /* set the wire flags to return to client. */
1110        clid->flags = new->cl_exchange_flags;
1111}
1112
1113__be32
1114nfsd4_exchange_id(struct svc_rqst *rqstp,
1115                  struct nfsd4_compound_state *cstate,
1116                  struct nfsd4_exchange_id *exid)
1117{
1118        struct nfs4_client *unconf, *conf, *new;
1119        int status;
1120        unsigned int            strhashval;
1121        char                    dname[HEXDIR_LEN];
1122        char                    addr_str[INET6_ADDRSTRLEN];
1123        nfs4_verifier           verf = exid->verifier;
1124        struct sockaddr         *sa = svc_addr(rqstp);
1125
1126        rpc_ntop(sa, addr_str, sizeof(addr_str));
1127        dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
1128                "ip_addr=%s flags %x, spa_how %d\n",
1129                __func__, rqstp, exid, exid->clname.len, exid->clname.data,
1130                addr_str, exid->flags, exid->spa_how);
1131
1132        if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
1133                return nfserr_inval;
1134
1135        /* Currently only support SP4_NONE */
1136        switch (exid->spa_how) {
1137        case SP4_NONE:
1138                break;
1139        case SP4_SSV:
1140                return nfserr_encr_alg_unsupp;
1141        default:
1142                BUG();                          /* checked by xdr code */
1143        case SP4_MACH_CRED:
1144                return nfserr_serverfault;      /* no excuse :-/ */
1145        }
1146
1147        status = nfs4_make_rec_clidname(dname, &exid->clname);
1148
1149        if (status)
1150                goto error;
1151
1152        strhashval = clientstr_hashval(dname);
1153
1154        nfs4_lock_state();
1155        status = nfs_ok;
1156
1157        conf = find_confirmed_client_by_str(dname, strhashval, true);
1158        if (conf) {
1159                if (!same_verf(&verf, &conf->cl_verifier)) {
1160                        /* 18.35.4 case 8 */
1161                        if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1162                                status = nfserr_not_same;
1163                                goto out;
1164                        }
1165                        /* Client reboot: destroy old state */
1166                        expire_client(conf);
1167                        goto out_new;
1168                }
1169                if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1170                        /* 18.35.4 case 9 */
1171                        if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1172                                status = nfserr_perm;
1173                                goto out;
1174                        }
1175                        expire_client(conf);
1176                        goto out_new;
1177                }
1178                /*
1179                 * Set bit when the owner id and verifier map to an already
1180                 * confirmed client id (18.35.3).
1181                 */
1182                exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
1183
1184                /*
1185                 * Falling into 18.35.4 case 2, possible router replay.
1186                 * Leave confirmed record intact and return same result.
1187                 */
1188                copy_verf(conf, &verf);
1189                new = conf;
1190                goto out_copy;
1191        }
1192
1193        /* 18.35.4 case 7 */
1194        if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1195                status = nfserr_noent;
1196                goto out;
1197        }
1198
1199        unconf  = find_unconfirmed_client_by_str(dname, strhashval, true);
1200        if (unconf) {
1201                /*
1202                 * Possible retry or client restart.  Per 18.35.4 case 4,
1203                 * a new unconfirmed record should be generated regardless
1204                 * of whether any properties have changed.
1205                 */
1206                expire_client(unconf);
1207        }
1208
1209out_new:
1210        /* Normal case */
1211        new = create_client(exid->clname, dname, rqstp, &verf);
1212        if (new == NULL) {
1213                status = nfserr_serverfault;
1214                goto out;
1215        }
1216
1217        gen_clid(new);
1218        add_to_unconfirmed(new, strhashval);
1219out_copy:
1220        exid->clientid.cl_boot = new->cl_clientid.cl_boot;
1221        exid->clientid.cl_id = new->cl_clientid.cl_id;
1222
1223        exid->seqid = 1;
1224        nfsd4_set_ex_flags(new, exid);
1225
1226        dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1227                new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
1228        status = nfs_ok;
1229
1230out:
1231        nfs4_unlock_state();
1232error:
1233        dprintk("nfsd4_exchange_id returns %d\n", ntohl(status));
1234        return status;
1235}
1236
1237static int
1238check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
1239{
1240        dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
1241                slot_seqid);
1242
1243        /* The slot is in use, and no response has been sent. */
1244        if (slot_inuse) {
1245                if (seqid == slot_seqid)
1246                        return nfserr_jukebox;
1247                else
1248                        return nfserr_seq_misordered;
1249        }
1250        /* Normal */
1251        if (likely(seqid == slot_seqid + 1))
1252                return nfs_ok;
1253        /* Replay */
1254        if (seqid == slot_seqid)
1255                return nfserr_replay_cache;
1256        /* Wraparound */
1257        if (seqid == 1 && (slot_seqid + 1) == 0)
1258                return nfs_ok;
1259        /* Misordered replay or misordered new request */
1260        return nfserr_seq_misordered;
1261}
1262
1263/*
1264 * Cache the create session result into the create session single DRC
1265 * slot cache by saving the xdr structure. sl_seqid has been set.
1266 * Do this for solo or embedded create session operations.
1267 */
1268static void
1269nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
1270                           struct nfsd4_clid_slot *slot, int nfserr)
1271{
1272        slot->sl_status = nfserr;
1273        memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
1274}
1275
1276static __be32
1277nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
1278                            struct nfsd4_clid_slot *slot)
1279{
1280        memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
1281        return slot->sl_status;
1282}
1283
1284__be32
1285nfsd4_create_session(struct svc_rqst *rqstp,
1286                     struct nfsd4_compound_state *cstate,
1287                     struct nfsd4_create_session *cr_ses)
1288{
1289        struct sockaddr *sa = svc_addr(rqstp);
1290        struct nfs4_client *conf, *unconf;
1291        struct nfsd4_clid_slot *cs_slot = NULL;
1292        int status = 0;
1293
1294        nfs4_lock_state();
1295        unconf = find_unconfirmed_client(&cr_ses->clientid);
1296        conf = find_confirmed_client(&cr_ses->clientid);
1297
1298        if (conf) {
1299                cs_slot = &conf->cl_cs_slot;
1300                status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1301                if (status == nfserr_replay_cache) {
1302                        dprintk("Got a create_session replay! seqid= %d\n",
1303                                cs_slot->sl_seqid);
1304                        /* Return the cached reply status */
1305                        status = nfsd4_replay_create_session(cr_ses, cs_slot);
1306                        goto out;
1307                } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
1308                        status = nfserr_seq_misordered;
1309                        dprintk("Sequence misordered!\n");
1310                        dprintk("Expected seqid= %d but got seqid= %d\n",
1311                                cs_slot->sl_seqid, cr_ses->seqid);
1312                        goto out;
1313                }
1314                cs_slot->sl_seqid++;
1315        } else if (unconf) {
1316                if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1317                    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
1318                        status = nfserr_clid_inuse;
1319                        goto out;
1320                }
1321
1322                cs_slot = &unconf->cl_cs_slot;
1323                status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1324                if (status) {
1325                        /* an unconfirmed replay returns misordered */
1326                        status = nfserr_seq_misordered;
1327                        goto out_cache;
1328                }
1329
1330                cs_slot->sl_seqid++; /* from 0 to 1 */
1331                move_to_confirmed(unconf);
1332
1333                /*
1334                 * We do not support RDMA or persistent sessions
1335                 */
1336                cr_ses->flags &= ~SESSION4_PERSIST;
1337                cr_ses->flags &= ~SESSION4_RDMA;
1338
1339                if (cr_ses->flags & SESSION4_BACK_CHAN) {
1340                        unconf->cl_cb_xprt = rqstp->rq_xprt;
1341                        svc_xprt_get(unconf->cl_cb_xprt);
1342                        rpc_copy_addr(
1343                                (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1344                                sa);
1345                        unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1346                        unconf->cl_cb_conn.cb_minorversion =
1347                                cstate->minorversion;
1348                        unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1349                        unconf->cl_cb_seq_nr = 1;
1350                        nfsd4_probe_callback(unconf);
1351                }
1352                conf = unconf;
1353        } else {
1354                status = nfserr_stale_clientid;
1355                goto out;
1356        }
1357
1358        status = alloc_init_session(rqstp, conf, cr_ses);
1359        if (status)
1360                goto out;
1361
1362        memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
1363               NFS4_MAX_SESSIONID_LEN);
1364        cr_ses->seqid = cs_slot->sl_seqid;
1365
1366out_cache:
1367        /* cache solo and embedded create sessions under the state lock */
1368        nfsd4_cache_create_session(cr_ses, cs_slot, status);
1369out:
1370        nfs4_unlock_state();
1371        dprintk("%s returns %d\n", __func__, ntohl(status));
1372        return status;
1373}
1374
1375__be32
1376nfsd4_destroy_session(struct svc_rqst *r,
1377                      struct nfsd4_compound_state *cstate,
1378                      struct nfsd4_destroy_session *sessionid)
1379{
1380        struct nfsd4_session *ses;
1381        u32 status = nfserr_badsession;
1382
1383        /* Notes:
1384         * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
1385         * - Should we return nfserr_back_chan_busy if waiting for
1386         *   callbacks on to-be-destroyed session?
1387         * - Do we need to clear any callback info from previous session?
1388         */
1389
1390        dump_sessionid(__func__, &sessionid->sessionid);
1391        spin_lock(&sessionid_lock);
1392        ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
1393        if (!ses) {
1394                spin_unlock(&sessionid_lock);
1395                goto out;
1396        }
1397
1398        unhash_session(ses);
1399        spin_unlock(&sessionid_lock);
1400
1401        /* wait for callbacks */
1402        shutdown_callback_client(ses->se_client);
1403        nfsd4_put_session(ses);
1404        status = nfs_ok;
1405out:
1406        dprintk("%s returns %d\n", __func__, ntohl(status));
1407        return status;
1408}
1409
1410__be32
1411nfsd4_sequence(struct svc_rqst *rqstp,
1412               struct nfsd4_compound_state *cstate,
1413               struct nfsd4_sequence *seq)
1414{
1415        struct nfsd4_compoundres *resp = rqstp->rq_resp;
1416        struct nfsd4_session *session;
1417        struct nfsd4_slot *slot;
1418        int status;
1419
1420        if (resp->opcnt != 1)
1421                return nfserr_sequence_pos;
1422
1423        spin_lock(&sessionid_lock);
1424        status = nfserr_badsession;
1425        session = find_in_sessionid_hashtbl(&seq->sessionid);
1426        if (!session)
1427                goto out;
1428
1429        status = nfserr_badslot;
1430        if (seq->slotid >= session->se_fchannel.maxreqs)
1431                goto out;
1432
1433        slot = session->se_slots[seq->slotid];
1434        dprintk("%s: slotid %d\n", __func__, seq->slotid);
1435
1436        /* We do not negotiate the number of slots yet, so set the
1437         * maxslots to the session maxreqs which is used to encode
1438         * sr_highest_slotid and the sr_target_slot id to maxslots */
1439        seq->maxslots = session->se_fchannel.maxreqs;
1440
1441        status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
1442        if (status == nfserr_replay_cache) {
1443                cstate->slot = slot;
1444                cstate->session = session;
1445                /* Return the cached reply status and set cstate->status
1446                 * for nfsd4_proc_compound processing */
1447                status = nfsd4_replay_cache_entry(resp, seq);
1448                cstate->status = nfserr_replay_cache;
1449                goto out;
1450        }
1451        if (status)
1452                goto out;
1453
1454        /* Success! bump slot seqid */
1455        slot->sl_inuse = true;
1456        slot->sl_seqid = seq->seqid;
1457        slot->sl_cachethis = seq->cachethis;
1458
1459        cstate->slot = slot;
1460        cstate->session = session;
1461
1462        /* Hold a session reference until done processing the compound:
1463         * nfsd4_put_session called only if the cstate slot is set.
1464         */
1465        nfsd4_get_session(session);
1466out:
1467        spin_unlock(&sessionid_lock);
1468        /* Renew the clientid on success and on replay */
1469        if (cstate->session) {
1470                nfs4_lock_state();
1471                renew_client(session->se_client);
1472                nfs4_unlock_state();
1473        }
1474        dprintk("%s: return %d\n", __func__, ntohl(status));
1475        return status;
1476}
1477
1478__be32
1479nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1480                  struct nfsd4_setclientid *setclid)
1481{
1482        struct sockaddr         *sa = svc_addr(rqstp);
1483        struct xdr_netobj       clname = { 
1484                .len = setclid->se_namelen,
1485                .data = setclid->se_name,
1486        };
1487        nfs4_verifier           clverifier = setclid->se_verf;
1488        unsigned int            strhashval;
1489        struct nfs4_client      *conf, *unconf, *new;
1490        __be32                  status;
1491        char                    dname[HEXDIR_LEN];
1492        
1493        if (!check_name(clname))
1494                return nfserr_inval;
1495
1496        status = nfs4_make_rec_clidname(dname, &clname);
1497        if (status)
1498                return status;
1499
1500        /* 
1501         * XXX The Duplicate Request Cache (DRC) has been checked (??)
1502         * We get here on a DRC miss.
1503         */
1504
1505        strhashval = clientstr_hashval(dname);
1506
1507        nfs4_lock_state();
1508        conf = find_confirmed_client_by_str(dname, strhashval, false);
1509        if (conf) {
1510                /* RFC 3530 14.2.33 CASE 0: */
1511                status = nfserr_clid_inuse;
1512                if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1513                        char addr_str[INET6_ADDRSTRLEN];
1514                        rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
1515                                 sizeof(addr_str));
1516                        dprintk("NFSD: setclientid: string in use by client "
1517                                "at %s\n", addr_str);
1518                        goto out;
1519                }
1520        }
1521        /*
1522         * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION")
1523         * has a description of SETCLIENTID request processing consisting
1524         * of 5 bullet points, labeled as CASE0 - CASE4 below.
1525         */
1526        unconf = find_unconfirmed_client_by_str(dname, strhashval, false);
1527        status = nfserr_resource;
1528        if (!conf) {
1529                /*
1530                 * RFC 3530 14.2.33 CASE 4:
1531                 * placed first, because it is the normal case
1532                 */
1533                if (unconf)
1534                        expire_client(unconf);
1535                new = create_client(clname, dname, rqstp, &clverifier);
1536                if (new == NULL)
1537                        goto out;
1538                gen_clid(new);
1539        } else if (same_verf(&conf->cl_verifier, &clverifier)) {
1540                /*
1541                 * RFC 3530 14.2.33 CASE 1:
1542                 * probable callback update
1543                 */
1544                if (unconf) {
1545                        /* Note this is removing unconfirmed {*x***},
1546                         * which is stronger than RFC recommended {vxc**}.
1547                         * This has the advantage that there is at most
1548                         * one {*x***} in either list at any time.
1549                         */
1550                        expire_client(unconf);
1551                }
1552                new = create_client(clname, dname, rqstp, &clverifier);
1553                if (new == NULL)
1554                        goto out;
1555                copy_clid(new, conf);
1556        } else if (!unconf) {
1557                /*
1558                 * RFC 3530 14.2.33 CASE 2:
1559                 * probable client reboot; state will be removed if
1560                 * confirmed.
1561                 */
1562                new = create_client(clname, dname, rqstp, &clverifier);
1563                if (new == NULL)
1564                        goto out;
1565                gen_clid(new);
1566        } else {
1567                /*
1568                 * RFC 3530 14.2.33 CASE 3:
1569                 * probable client reboot; state will be removed if
1570                 * confirmed.
1571                 */
1572                expire_client(unconf);
1573                new = create_client(clname, dname, rqstp, &clverifier);
1574                if (new == NULL)
1575                        goto out;
1576                gen_clid(new);
1577        }
1578        gen_callback(new, setclid, rpc_get_scope_id(sa));
1579        add_to_unconfirmed(new, strhashval);
1580        setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
1581        setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
1582        memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
1583        status = nfs_ok;
1584out:
1585        nfs4_unlock_state();
1586        return status;
1587}
1588
1589
1590/*
1591 * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has
1592 * a description of SETCLIENTID_CONFIRM request processing consisting of 4
1593 * bullets, labeled as CASE1 - CASE4 below.
1594 */
1595__be32
1596nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1597                         struct nfsd4_compound_state *cstate,
1598                         struct nfsd4_setclientid_confirm *setclientid_confirm)
1599{
1600        struct sockaddr *sa = svc_addr(rqstp);
1601        struct nfs4_client *conf, *unconf;
1602        nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
1603        clientid_t * clid = &setclientid_confirm->sc_clientid;
1604        __be32 status;
1605
1606        if (STALE_CLIENTID(clid))
1607                return nfserr_stale_clientid;
1608        /* 
1609         * XXX The Duplicate Request Cache (DRC) has been checked (??)
1610         * We get here on a DRC miss.
1611         */
1612
1613        nfs4_lock_state();
1614
1615        conf = find_confirmed_client(clid);
1616        unconf = find_unconfirmed_client(clid);
1617
1618        status = nfserr_clid_inuse;
1619        if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
1620                goto out;
1621        if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
1622                goto out;
1623
1624        /*
1625         * section 14.2.34 of RFC 3530 has a description of
1626         * SETCLIENTID_CONFIRM request processing consisting
1627         * of 4 bullet points, labeled as CASE1 - CASE4 below.
1628         */
1629        if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) {
1630                /*
1631                 * RFC 3530 14.2.34 CASE 1:
1632                 * callback update
1633                 */
1634                if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
1635                        status = nfserr_clid_inuse;
1636                else {
1637                        /* XXX: We just turn off callbacks until we can handle
1638                          * change request correctly. */
1639                        atomic_set(&conf->cl_cb_conn.cb_set, 0);
1640                        expire_client(unconf);
1641                        status = nfs_ok;
1642
1643                }
1644        } else if (conf && !unconf) {
1645                /*
1646                 * RFC 3530 14.2.34 CASE 2:
1647                 * probable retransmitted request; play it safe and
1648                 * do nothing.
1649                 */
1650                if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
1651                        status = nfserr_clid_inuse;
1652                else
1653                        status = nfs_ok;
1654        } else if (!conf && unconf
1655                        && same_verf(&unconf->cl_confirm, &confirm)) {
1656                /*
1657                 * RFC 3530 14.2.34 CASE 3:
1658                 * Normal case; new or rebooted client:
1659                 */
1660                if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
1661                        status = nfserr_clid_inuse;
1662                } else {
1663                        unsigned int hash =
1664                                clientstr_hashval(unconf->cl_recdir);
1665                        conf = find_confirmed_client_by_str(unconf->cl_recdir,
1666                                                            hash, false);
1667                        if (conf) {
1668                                nfsd4_remove_clid_dir(conf);
1669                                expire_client(conf);
1670                        }
1671                        move_to_confirmed(unconf);
1672                        conf = unconf;
1673                        nfsd4_probe_callback(conf);
1674                        status = nfs_ok;
1675                }
1676        } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
1677            && (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
1678                                                                &confirm)))) {
1679                /*
1680                 * RFC 3530 14.2.34 CASE 4:
1681                 * Client probably hasn't noticed that we rebooted yet.
1682                 */
1683                status = nfserr_stale_clientid;
1684        } else {
1685                /* check that we have hit one of the cases...*/
1686                status = nfserr_clid_inuse;
1687        }
1688out:
1689        nfs4_unlock_state();
1690        return status;
1691}
1692
1693/* OPEN Share state helper functions */
1694static inline struct nfs4_file *
1695alloc_init_file(struct inode *ino)
1696{
1697        struct nfs4_file *fp;
1698        unsigned int hashval = file_hashval(ino);
1699
1700        fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
1701        if (fp) {
1702                atomic_set(&fp->fi_ref, 1);
1703                INIT_LIST_HEAD(&fp->fi_hash);
1704                INIT_LIST_HEAD(&fp->fi_stateids);
1705                INIT_LIST_HEAD(&fp->fi_delegations);
1706                spin_lock(&recall_lock);
1707                list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1708                spin_unlock(&recall_lock);
1709                fp->fi_inode = igrab(ino);
1710                fp->fi_id = current_fileid++;
1711                fp->fi_had_conflict = false;
1712                return fp;
1713        }
1714        return NULL;
1715}
1716
1717static void
1718nfsd4_free_slab(struct kmem_cache **slab)
1719{
1720        if (*slab == NULL)
1721                return;
1722        kmem_cache_destroy(*slab);
1723        *slab = NULL;
1724}
1725
1726void
1727nfsd4_free_slabs(void)
1728{
1729        nfsd4_free_slab(&stateowner_slab);
1730        nfsd4_free_slab(&file_slab);
1731        nfsd4_free_slab(&stateid_slab);
1732        nfsd4_free_slab(&deleg_slab);
1733}
1734
1735static int
1736nfsd4_init_slabs(void)
1737{
1738        stateowner_slab = kmem_cache_create("nfsd4_stateowners",
1739                        sizeof(struct nfs4_stateowner), 0, 0, NULL);
1740        if (stateowner_slab == NULL)
1741                goto out_nomem;
1742        file_slab = kmem_cache_create("nfsd4_files",
1743                        sizeof(struct nfs4_file), 0, 0, NULL);
1744        if (file_slab == NULL)
1745                goto out_nomem;
1746        stateid_slab = kmem_cache_create("nfsd4_stateids",
1747                        sizeof(struct nfs4_stateid), 0, 0, NULL);
1748        if (stateid_slab == NULL)
1749                goto out_nomem;
1750        deleg_slab = kmem_cache_create("nfsd4_delegations",
1751                        sizeof(struct nfs4_delegation), 0, 0, NULL);
1752        if (deleg_slab == NULL)
1753                goto out_nomem;
1754        return 0;
1755out_nomem:
1756        nfsd4_free_slabs();
1757        dprintk("nfsd4: out of memory while initializing nfsv4\n");
1758        return -ENOMEM;
1759}
1760
1761void
1762nfs4_free_stateowner(struct kref *kref)
1763{
1764        struct nfs4_stateowner *sop =
1765                container_of(kref, struct nfs4_stateowner, so_ref);
1766        kfree(sop->so_owner.data);
1767        kmem_cache_free(stateowner_slab, sop);
1768}
1769
1770static inline struct nfs4_stateowner *
1771alloc_stateowner(struct xdr_netobj *owner)
1772{
1773        struct nfs4_stateowner *sop;
1774
1775        if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
1776                if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
1777                        memcpy(sop->so_owner.data, owner->data, owner->len);
1778                        sop->so_owner.len = owner->len;
1779                        kref_init(&sop->so_ref);
1780                        return sop;
1781                } 
1782                kmem_cache_free(stateowner_slab, sop);
1783        }
1784        return NULL;
1785}
1786
1787static struct nfs4_stateowner *
1788alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
1789        struct nfs4_stateowner *sop;
1790        struct nfs4_replay *rp;
1791        unsigned int idhashval;
1792
1793        if (!(sop = alloc_stateowner(&open->op_owner)))
1794                return NULL;
1795        idhashval = ownerid_hashval(current_ownerid);
1796        INIT_LIST_HEAD(&sop->so_idhash);
1797        INIT_LIST_HEAD(&sop->so_strhash);
1798        INIT_LIST_HEAD(&sop->so_perclient);
1799        INIT_LIST_HEAD(&sop->so_stateids);
1800        INIT_LIST_HEAD(&sop->so_perstateid);  /* not used */
1801        INIT_LIST_HEAD(&sop->so_close_lru);
1802        sop->so_time = 0;
1803        list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
1804        list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
1805        list_add(&sop->so_perclient, &clp->cl_openowners);
1806        sop->so_is_open_owner = 1;
1807        sop->so_id = current_ownerid++;
1808        sop->so_client = clp;
1809        sop->so_seqid = open->op_seqid;
1810        sop->so_confirmed = 0;
1811        rp = &sop->so_replay;
1812        rp->rp_status = nfserr_serverfault;
1813        rp->rp_buflen = 0;
1814        rp->rp_buf = rp->rp_ibuf;
1815        return sop;
1816}
1817
1818static inline void
1819init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
1820        struct nfs4_stateowner *sop = open->op_stateowner;
1821        unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
1822
1823        INIT_LIST_HEAD(&stp->st_hash);
1824        INIT_LIST_HEAD(&stp->st_perstateowner);
1825        INIT_LIST_HEAD(&stp->st_lockowners);
1826        INIT_LIST_HEAD(&stp->st_perfile);
1827        list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
1828        list_add(&stp->st_perstateowner, &sop->so_stateids);
1829        list_add(&stp->st_perfile, &fp->fi_stateids);
1830        stp->st_stateowner = sop;
1831        get_nfs4_file(fp);
1832        stp->st_file = fp;
1833        stp->st_stateid.si_boot = get_seconds();
1834        stp->st_stateid.si_stateownerid = sop->so_id;
1835        stp->st_stateid.si_fileid = fp->fi_id;
1836        stp->st_stateid.si_generation = 0;
1837        stp->st_access_bmap = 0;
1838        stp->st_deny_bmap = 0;
1839        __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK,
1840                  &stp->st_access_bmap);
1841        __set_bit(open->op_share_deny, &stp->st_deny_bmap);
1842        stp->st_openstp = NULL;
1843}
1844
1845static void
1846move_to_close_lru(struct nfs4_stateowner *sop)
1847{
1848        dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
1849
1850        list_move_tail(&sop->so_close_lru, &close_lru);
1851        sop->so_time = get_seconds();
1852}
1853
1854static int
1855same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
1856                                                        clientid_t *clid)
1857{
1858        return (sop->so_owner.len == owner->len) &&
1859                0 == memcmp(sop->so_owner.data, owner->data, owner->len) &&
1860                (sop->so_client->cl_clientid.cl_id == clid->cl_id);
1861}
1862
1863static struct nfs4_stateowner *
1864find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
1865{
1866        struct nfs4_stateowner *so = NULL;
1867
1868        list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
1869                if (same_owner_str(so, &open->op_owner, &open->op_clientid))
1870                        return so;
1871        }
1872        return NULL;
1873}
1874
1875/* search file_hashtbl[] for file */
1876static struct nfs4_file *
1877find_file(struct inode *ino)
1878{
1879        unsigned int hashval = file_hashval(ino);
1880        struct nfs4_file *fp;
1881
1882        spin_lock(&recall_lock);
1883        list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
1884                if (fp->fi_inode == ino) {
1885                        get_nfs4_file(fp);
1886                        spin_unlock(&recall_lock);
1887                        return fp;
1888                }
1889        }
1890        spin_unlock(&recall_lock);
1891        return NULL;
1892}
1893
1894static inline int access_valid(u32 x, u32 minorversion)
1895{
1896        if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
1897                return 0;
1898        if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH)
1899                return 0;
1900        x &= ~NFS4_SHARE_ACCESS_MASK;
1901        if (minorversion && x) {
1902                if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL)
1903                        return 0;
1904                if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED)
1905                        return 0;
1906                x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK);
1907        }
1908        if (x)
1909                return 0;
1910        return 1;
1911}
1912
1913static inline int deny_valid(u32 x)
1914{
1915        /* Note: unlike access bits, deny bits may be zero. */
1916        return x <= NFS4_SHARE_DENY_BOTH;
1917}
1918
1919/*
1920 * We store the NONE, READ, WRITE, and BOTH bits separately in the
1921 * st_{access,deny}_bmap field of the stateid, in order to track not
1922 * only what share bits are currently in force, but also what
1923 * combinations of share bits previous opens have used.  This allows us
1924 * to enforce the recommendation of rfc 3530 14.2.19 that the server
1925 * return an error if the client attempt to downgrade to a combination
1926 * of share bits not explicable by closing some of its previous opens.
1927 *
1928 * XXX: This enforcement is actually incomplete, since we don't keep
1929 * track of access/deny bit combinations; so, e.g., we allow:
1930 *
1931 *      OPEN allow read, deny write
1932 *      OPEN allow both, deny none
1933 *      DOWNGRADE allow read, deny none
1934 *
1935 * which we should reject.
1936 */
1937static void
1938set_access(unsigned int *access, unsigned long bmap) {
1939        int i;
1940
1941        *access = 0;
1942        for (i = 1; i < 4; i++) {
1943                if (test_bit(i, &bmap))
1944                        *access |= i;
1945        }
1946}
1947
1948static void
1949set_deny(unsigned int *deny, unsigned long bmap) {
1950        int i;
1951
1952        *deny = 0;
1953        for (i = 0; i < 4; i++) {
1954                if (test_bit(i, &bmap))
1955                        *deny |= i ;
1956        }
1957}
1958
1959static int
1960test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
1961        unsigned int access, deny;
1962
1963        set_access(&access, stp->st_access_bmap);
1964        set_deny(&deny, stp->st_deny_bmap);
1965        if ((access & open->op_share_deny) || (deny & open->op_share_access))
1966                return 0;
1967        return 1;
1968}
1969
1970/*
1971 * Called to check deny when READ with all zero stateid or
1972 * WRITE with all zero or all one stateid
1973 */
1974static __be32
1975nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
1976{
1977        struct inode *ino = current_fh->fh_dentry->d_inode;
1978        struct nfs4_file *fp;
1979        struct nfs4_stateid *stp;
1980        __be32 ret;
1981
1982        dprintk("NFSD: nfs4_share_conflict\n");
1983
1984        fp = find_file(ino);
1985        if (!fp)
1986                return nfs_ok;
1987        ret = nfserr_locked;
1988        /* Search for conflicting share reservations */
1989        list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
1990                if (test_bit(deny_type, &stp->st_deny_bmap) ||
1991                    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
1992                        goto out;
1993        }
1994        ret = nfs_ok;
1995out:
1996        put_nfs4_file(fp);
1997        return ret;
1998}
1999
2000static inline void
2001nfs4_file_downgrade(struct file *filp, unsigned int share_access)
2002{
2003        if (share_access & NFS4_SHARE_ACCESS_WRITE) {
2004                drop_file_write_access(filp);
2005                filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
2006        }
2007}
2008
2009/*
2010 * Spawn a thread to perform a recall on the delegation represented
2011 * by the lease (file_lock)
2012 *
2013 * Called from break_lease() with lock_kernel() held.
2014 * Note: we assume break_lease will only call this *once* for any given
2015 * lease.
2016 */
2017static
2018void nfsd_break_deleg_cb(struct file_lock *fl)
2019{
2020        struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2021
2022        dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
2023        if (!dp)
2024                return;
2025
2026        /* We're assuming the state code never drops its reference
2027         * without first removing the lease.  Since we're in this lease
2028         * callback (and since the lease code is serialized by the kernel
2029         * lock) we know the server hasn't removed the lease yet, we know
2030         * it's safe to take a reference: */
2031        atomic_inc(&dp->dl_count);
2032        atomic_inc(&dp->dl_client->cl_count);
2033
2034        spin_lock(&recall_lock);
2035        list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
2036        spin_unlock(&recall_lock);
2037
2038        /* only place dl_time is set. protected by lock_kernel*/
2039        dp->dl_time = get_seconds();
2040
2041        /*
2042         * We don't want the locks code to timeout the lease for us;
2043         * we'll remove it ourself if the delegation isn't returned
2044         * in time.
2045         */
2046        fl->fl_break_time = 0;
2047
2048        dp->dl_file->fi_had_conflict = true;
2049        nfsd4_cb_recall(dp);
2050}
2051
2052/*
2053 * The file_lock is being reapd.
2054 *
2055 * Called by locks_free_lock() with lock_kernel() held.
2056 */
2057static
2058void nfsd_release_deleg_cb(struct file_lock *fl)
2059{
2060        struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2061
2062        dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count));
2063
2064        if (!(fl->fl_flags & FL_LEASE) || !dp)
2065                return;
2066        dp->dl_flock = NULL;
2067}
2068
2069/*
2070 * Set the delegation file_lock back pointer.
2071 *
2072 * Called from setlease() with lock_kernel() held.
2073 */
2074static
2075void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
2076{
2077        struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
2078
2079        dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
2080        if (!dp)
2081                return;
2082        dp->dl_flock = new;
2083}
2084
2085/*
2086 * Called from setlease() with lock_kernel() held
2087 */
2088static
2089int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
2090{
2091        struct nfs4_delegation *onlistd =
2092                (struct nfs4_delegation *)onlist->fl_owner;
2093        struct nfs4_delegation *tryd =
2094                (struct nfs4_delegation *)try->fl_owner;
2095
2096        if (onlist->fl_lmops != try->fl_lmops)
2097                return 0;
2098
2099        return onlistd->dl_client == tryd->dl_client;
2100}
2101
2102
2103static
2104int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
2105{
2106        if (arg & F_UNLCK)
2107                return lease_modify(onlist, arg);
2108        else
2109                return -EAGAIN;
2110}
2111
2112static const struct lock_manager_operations nfsd_lease_mng_ops = {
2113        .fl_break = nfsd_break_deleg_cb,
2114        .fl_release_private = nfsd_release_deleg_cb,
2115        .fl_copy_lock = nfsd_copy_lock_deleg_cb,
2116        .fl_mylease = nfsd_same_client_deleg_cb,
2117        .fl_change = nfsd_change_deleg_cb,
2118};
2119
2120
2121__be32
2122nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2123                    struct nfsd4_open *open)
2124{
2125        clientid_t *clientid = &open->op_clientid;
2126        struct nfs4_client *clp = NULL;
2127        unsigned int strhashval;
2128        struct nfs4_stateowner *sop = NULL;
2129
2130        if (!check_name(open->op_owner))
2131                return nfserr_inval;
2132
2133        if (STALE_CLIENTID(&open->op_clientid))
2134                return nfserr_stale_clientid;
2135
2136        strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
2137        sop = find_openstateowner_str(strhashval, open);
2138        open->op_stateowner = sop;
2139        if (!sop) {
2140                /* Make sure the client's lease hasn't expired. */
2141                clp = find_confirmed_client(clientid);
2142                if (clp == NULL)
2143                        return nfserr_expired;
2144                goto renew;
2145        }
2146        /* When sessions are used, skip open sequenceid processing */
2147        if (nfsd4_has_session(cstate))
2148                goto renew;
2149        if (!sop->so_confirmed) {
2150                /* Replace unconfirmed owners without checking for replay. */
2151                clp = sop->so_client;
2152                release_openowner(sop);
2153                open->op_stateowner = NULL;
2154                goto renew;
2155        }
2156        if (open->op_seqid == sop->so_seqid - 1) {
2157                if (sop->so_replay.rp_buflen)
2158                        return nfserr_replay_me;
2159                /* The original OPEN failed so spectacularly
2160                 * that we don't even have replay data saved!
2161                 * Therefore, we have no choice but to continue
2162                 * processing this OPEN; presumably, we'll
2163                 * fail again for the same reason.
2164                 */
2165                dprintk("nfsd4_process_open1: replay with no replay cache\n");
2166                goto renew;
2167        }
2168        if (open->op_seqid != sop->so_seqid)
2169                return nfserr_bad_seqid;
2170renew:
2171        if (open->op_stateowner == NULL) {
2172                sop = alloc_init_open_stateowner(strhashval, clp, open);
2173                if (sop == NULL)
2174                        return nfserr_resource;
2175                open->op_stateowner = sop;
2176        }
2177        list_del_init(&sop->so_close_lru);
2178        renew_client(sop->so_client);
2179        return nfs_ok;
2180}
2181
2182static inline __be32
2183nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2184{
2185        if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
2186                return nfserr_openmode;
2187        else
2188                return nfs_ok;
2189}
2190
2191static struct nfs4_delegation *
2192find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2193{
2194        struct nfs4_delegation *dp;
2195
2196        list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
2197                if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
2198                        return dp;
2199        }
2200        return NULL;
2201}
2202
2203static __be32
2204nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
2205                struct nfs4_delegation **dp)
2206{
2207        int flags;
2208        __be32 status = nfserr_bad_stateid;
2209
2210        *dp = find_delegation_file(fp, &open->op_delegate_stateid);
2211        if (*dp == NULL)
2212                goto out;
2213        flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
2214                                                RD_STATE : WR_STATE;
2215        status = nfs4_check_delegmode(*dp, flags);
2216        if (status)
2217                *dp = NULL;
2218out:
2219        if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
2220                return nfs_ok;
2221        if (status)
2222                return status;
2223        open->op_stateowner->so_confirmed = 1;
2224        return nfs_ok;
2225}
2226
2227static __be32
2228nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
2229{
2230        struct nfs4_stateid *local;
2231        __be32 status = nfserr_share_denied;
2232        struct nfs4_stateowner *sop = open->op_stateowner;
2233
2234        list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
2235                /* ignore lock owners */
2236                if (local->st_stateowner->so_is_open_owner == 0)
2237                        continue;
2238                /* remember if we have seen this open owner */
2239                if (local->st_stateowner == sop)
2240                        *stpp = local;
2241                /* check for conflicting share reservations */
2242                if (!test_share(local, open))
2243                        goto out;
2244        }
2245        status = 0;
2246out:
2247        return status;
2248}
2249
2250static inline struct nfs4_stateid *
2251nfs4_alloc_stateid(void)
2252{
2253        return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
2254}
2255
2256static __be32
2257nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
2258                struct nfs4_delegation *dp,
2259                struct svc_fh *cur_fh, int flags)
2260{
2261        struct nfs4_stateid *stp;
2262
2263        stp = nfs4_alloc_stateid();
2264        if (stp == NULL)
2265                return nfserr_resource;
2266
2267        if (dp) {
2268                get_file(dp->dl_vfs_file);
2269                stp->st_vfs_file = dp->dl_vfs_file;
2270        } else {
2271                __be32 status;
2272                status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
2273                                &stp->st_vfs_file);
2274                if (status) {
2275                        if (status == nfserr_dropit)
2276                                status = nfserr_jukebox;
2277                        kmem_cache_free(stateid_slab, stp);
2278                        return status;
2279                }
2280        }
2281        *stpp = stp;
2282        return 0;
2283}
2284
2285static inline __be32
2286nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
2287                struct nfsd4_open *open)
2288{
2289        struct iattr iattr = {
2290                .ia_valid = ATTR_SIZE,
2291                .ia_size = 0,
2292        };
2293        if (!open->op_truncate)
2294                return 0;
2295        if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
2296                return nfserr_inval;
2297        return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
2298}
2299
2300static __be32
2301nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
2302{
2303        struct file *filp = stp->st_vfs_file;
2304        struct inode *inode = filp->f_path.dentry->d_inode;
2305        unsigned int share_access, new_writer;
2306        __be32 status;
2307
2308        set_access(&share_access, stp->st_access_bmap);
2309        new_writer = (~share_access) & open->op_share_access
2310                        & NFS4_SHARE_ACCESS_WRITE;
2311
2312        if (new_writer) {
2313                int err = get_write_access(inode);
2314                if (err)
2315                        return nfserrno(err);
2316                err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
2317                if (err)
2318                        return nfserrno(err);
2319                file_take_write(filp);
2320        }
2321        status = nfsd4_truncate(rqstp, cur_fh, open);
2322        if (status) {
2323                if (new_writer)
2324                        put_write_access(inode);
2325                return status;
2326        }
2327        /* remember the open */
2328        filp->f_mode |= open->op_share_access;
2329        __set_bit(open->op_share_access, &stp->st_access_bmap);
2330        __set_bit(open->op_share_deny, &stp->st_deny_bmap);
2331
2332        return nfs_ok;
2333}
2334
2335
2336static void
2337nfs4_set_claim_prev(struct nfsd4_open *open)
2338{
2339        open->op_stateowner->so_confirmed = 1;
2340        open->op_stateowner->so_client->cl_firststate = 1;
2341}
2342
2343/*
2344 * Attempt to hand out a delegation.
2345 */
2346static void
2347nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp)
2348{
2349        struct nfs4_delegation *dp;
2350        struct nfs4_stateowner *sop = stp->st_stateowner;
2351        struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
2352        struct file_lock fl, *flp = &fl;
2353        int status, flag = 0;
2354
2355        flag = NFS4_OPEN_DELEGATE_NONE;
2356        open->op_recall = 0;
2357        switch (open->op_claim_type) {
2358                case NFS4_OPEN_CLAIM_PREVIOUS:
2359                        if (!atomic_read(&cb->cb_set))
2360                                open->op_recall = 1;
2361                        flag = open->op_delegate_type;
2362                        if (flag == NFS4_OPEN_DELEGATE_NONE)
2363                                goto out;
2364                        break;
2365                case NFS4_OPEN_CLAIM_NULL:
2366                        /* Let's not give out any delegations till everyone's
2367                         * had the chance to reclaim theirs.... */
2368                        if (locks_in_grace())
2369                                goto out;
2370                        if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
2371                                goto out;
2372                        if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2373                                flag = NFS4_OPEN_DELEGATE_WRITE;
2374                        else
2375                                flag = NFS4_OPEN_DELEGATE_READ;
2376                        break;
2377                default:
2378                        goto out;
2379        }
2380
2381        dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
2382        if (dp == NULL) {
2383                flag = NFS4_OPEN_DELEGATE_NONE;
2384                goto out;
2385        }
2386        locks_init_lock(&fl);
2387        fl.fl_lmops = &nfsd_lease_mng_ops;
2388        fl.fl_flags = FL_LEASE;
2389        fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2390        fl.fl_end = OFFSET_MAX;
2391        fl.fl_owner =  (fl_owner_t)dp;
2392        fl.fl_file = stp->st_vfs_file;
2393        fl.fl_pid = current->tgid;
2394
2395        /* vfs_setlease checks to see if delegation should be handed out.
2396         * the lock_manager callbacks fl_mylease and fl_change are used
2397         */
2398        if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) {
2399                dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2400                unhash_delegation(dp);
2401                flag = NFS4_OPEN_DELEGATE_NONE;
2402                goto out;
2403        }
2404
2405        memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2406
2407        dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
2408                     dp->dl_stateid.si_boot,
2409                     dp->dl_stateid.si_stateownerid,
2410                     dp->dl_stateid.si_fileid,
2411                     dp->dl_stateid.si_generation);
2412out:
2413        if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2414                        && flag == NFS4_OPEN_DELEGATE_NONE
2415                        && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2416                dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2417        open->op_delegate_type = flag;
2418}
2419
2420/*
2421 * called with nfs4_lock_state() held.
2422 */
2423__be32
2424nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
2425{
2426        struct nfsd4_compoundres *resp = rqstp->rq_resp;
2427        struct nfs4_file *fp = NULL;
2428        struct inode *ino = current_fh->fh_dentry->d_inode;
2429        struct nfs4_stateid *stp = NULL;
2430        struct nfs4_delegation *dp = NULL;
2431        __be32 status;
2432
2433        status = nfserr_inval;
2434        if (!access_valid(open->op_share_access, resp->cstate.minorversion)
2435                        || !deny_valid(open->op_share_deny))
2436                goto out;
2437        /*
2438         * Lookup file; if found, lookup stateid and check open request,
2439         * and check for delegations in the process of being recalled.
2440         * If not found, create the nfs4_file struct
2441         */
2442        fp = find_file(ino);
2443        if (fp) {
2444                if ((status = nfs4_check_open(fp, open, &stp)))
2445                        goto out;
2446                status = nfs4_check_deleg(fp, open, &dp);
2447                if (status)
2448                        goto out;
2449        } else {
2450                status = nfserr_bad_stateid;
2451                if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
2452                        goto out;
2453                status = nfserr_resource;
2454                fp = alloc_init_file(ino);
2455                if (fp == NULL)
2456                        goto out;
2457        }
2458
2459        /*
2460         * OPEN the file, or upgrade an existing OPEN.
2461         * If truncate fails, the OPEN fails.
2462         */
2463        if (stp) {
2464                /* Stateid was found, this is an OPEN upgrade */
2465                status = nfs4_upgrade_open(rqstp, current_fh, stp, open);
2466                if (status)
2467                        goto out;
2468                update_stateid(&stp->st_stateid);
2469        } else {
2470                /* Stateid was not found, this is a new OPEN */
2471                int flags = 0;
2472                if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
2473                        flags |= NFSD_MAY_READ;
2474                if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2475                        flags |= NFSD_MAY_WRITE;
2476                status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
2477                if (status)
2478                        goto out;
2479                init_stateid(stp, fp, open);
2480                status = nfsd4_truncate(rqstp, current_fh, open);
2481                if (status) {
2482                        release_open_stateid(stp);
2483                        goto out;
2484                }
2485                if (nfsd4_has_session(&resp->cstate))
2486                        update_stateid(&stp->st_stateid);
2487        }
2488        memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
2489
2490        if (nfsd4_has_session(&resp->cstate))
2491                open->op_stateowner->so_confirmed = 1;
2492
2493        /*
2494        * Attempt to hand out a delegation. No error return, because the
2495        * OPEN succeeds even if we fail.
2496        */
2497        nfs4_open_delegation(current_fh, open, stp);
2498
2499        status = nfs_ok;
2500
2501        dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
2502                    stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
2503                    stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
2504out:
2505        if (fp)
2506                put_nfs4_file(fp);
2507        if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
2508                nfs4_set_claim_prev(open);
2509        /*
2510        * To finish the open response, we just need to set the rflags.
2511        */
2512        open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
2513        if (!open->op_stateowner->so_confirmed &&
2514            !nfsd4_has_session(&resp->cstate))
2515                open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
2516
2517        return status;
2518}
2519
2520__be32
2521nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2522            clientid_t *clid)
2523{
2524        struct nfs4_client *clp;
2525        __be32 status;
2526
2527        nfs4_lock_state();
2528        dprintk("process_renew(%08x/%08x): starting\n", 
2529                        clid->cl_boot, clid->cl_id);
2530        status = nfserr_stale_clientid;
2531        if (STALE_CLIENTID(clid))
2532                goto out;
2533        clp = find_confirmed_client(clid);
2534        status = nfserr_expired;
2535        if (clp == NULL) {
2536                /* We assume the client took too long to RENEW. */
2537                dprintk("nfsd4_renew: clientid not found!\n");
2538                goto out;
2539        }
2540        renew_client(clp);
2541        status = nfserr_cb_path_down;
2542        if (!list_empty(&clp->cl_delegations)
2543                        && !atomic_read(&clp->cl_cb_conn.cb_set))
2544                goto out;
2545        status = nfs_ok;
2546out:
2547        nfs4_unlock_state();
2548        return status;
2549}
2550
2551struct lock_manager nfsd4_manager = {
2552};
2553
2554static void
2555nfsd4_end_grace(void)
2556{
2557        dprintk("NFSD: end of grace period\n");
2558        nfsd4_recdir_purge_old();
2559        locks_end_grace(&nfsd4_manager);
2560}
2561
2562static time_t
2563nfs4_laundromat(void)
2564{
2565        struct nfs4_client *clp;
2566        struct nfs4_stateowner *sop;
2567        struct nfs4_delegation *dp;
2568        struct list_head *pos, *next, reaplist;
2569        time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
2570        time_t t, clientid_val = NFSD_LEASE_TIME;
2571        time_t u, test_val = NFSD_LEASE_TIME;
2572
2573        nfs4_lock_state();
2574
2575        dprintk("NFSD: laundromat service - starting\n");
2576        if (locks_in_grace())
2577                nfsd4_end_grace();
2578        list_for_each_safe(pos, next, &client_lru) {
2579                clp = list_entry(pos, struct nfs4_client, cl_lru);
2580                if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
2581                        t = clp->cl_time - cutoff;
2582                        if (clientid_val > t)
2583                                clientid_val = t;
2584                        break;
2585                }
2586                dprintk("NFSD: purging unused client (clientid %08x)\n",
2587                        clp->cl_clientid.cl_id);
2588                nfsd4_remove_clid_dir(clp);
2589                expire_client(clp);
2590        }
2591        INIT_LIST_HEAD(&reaplist);
2592        spin_lock(&recall_lock);
2593        list_for_each_safe(pos, next, &del_recall_lru) {
2594                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
2595                if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
2596                        u = dp->dl_time - cutoff;
2597                        if (test_val > u)
2598                                test_val = u;
2599                        break;
2600                }
2601                dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
2602                                    dp, dp->dl_flock);
2603                list_move(&dp->dl_recall_lru, &reaplist);
2604        }
2605        spin_unlock(&recall_lock);
2606        list_for_each_safe(pos, next, &reaplist) {
2607                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
2608                list_del_init(&dp->dl_recall_lru);
2609                unhash_delegation(dp);
2610        }
2611        test_val = NFSD_LEASE_TIME;
2612        list_for_each_safe(pos, next, &close_lru) {
2613                sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
2614                if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
2615                        u = sop->so_time - cutoff;
2616                        if (test_val > u)
2617                                test_val = u;
2618                        break;
2619                }
2620                dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
2621                        sop->so_id);
2622                release_openowner(sop);
2623        }
2624        if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
2625                clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
2626        nfs4_unlock_state();
2627        return clientid_val;
2628}
2629
2630static struct workqueue_struct *laundry_wq;
2631static void laundromat_main(struct work_struct *);
2632static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
2633
2634static void
2635laundromat_main(struct work_struct *not_used)
2636{
2637        time_t t;
2638
2639        t = nfs4_laundromat();
2640        dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
2641        queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
2642}
2643
2644static struct nfs4_stateowner *
2645search_close_lru(u32 st_id, int flags)
2646{
2647        struct nfs4_stateowner *local = NULL;
2648
2649        if (flags & CLOSE_STATE) {
2650                list_for_each_entry(local, &close_lru, so_close_lru) {
2651                        if (local->so_id == st_id)
2652                                return local;
2653                }
2654        }
2655        return NULL;
2656}
2657
2658static inline int
2659nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
2660{
2661        return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
2662}
2663
2664static int
2665STALE_STATEID(stateid_t *stateid)
2666{
2667        if (time_after((unsigned long)boot_time,
2668                        (unsigned long)stateid->si_boot)) {
2669                dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
2670                        stateid->si_boot, stateid->si_stateownerid,
2671                        stateid->si_fileid, stateid->si_generation);
2672                return 1;
2673        }
2674        return 0;
2675}
2676
2677static int
2678EXPIRED_STATEID(stateid_t *stateid)
2679{
2680        if (time_before((unsigned long)boot_time,
2681                        ((unsigned long)stateid->si_boot)) &&
2682            time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2683                dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
2684                        stateid->si_boot, stateid->si_stateownerid,
2685                        stateid->si_fileid, stateid->si_generation);
2686                return 1;
2687        }
2688        return 0;
2689}
2690
2691static __be32
2692stateid_error_map(stateid_t *stateid)
2693{
2694        if (STALE_STATEID(stateid))
2695                return nfserr_stale_stateid;
2696        if (EXPIRED_STATEID(stateid))
2697                return nfserr_expired;
2698
2699        dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
2700                stateid->si_boot, stateid->si_stateownerid,
2701                stateid->si_fileid, stateid->si_generation);
2702        return nfserr_bad_stateid;
2703}
2704
2705static inline int
2706access_permit_read(unsigned long access_bmap)
2707{
2708        return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
2709                test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) ||
2710                test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap);
2711}
2712
2713static inline int
2714access_permit_write(unsigned long access_bmap)
2715{
2716        return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
2717                test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
2718}
2719
2720static
2721__be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
2722{
2723        __be32 status = nfserr_openmode;
2724
2725        if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
2726                goto out;
2727        if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
2728                goto out;
2729        status = nfs_ok;
2730out:
2731        return status;
2732}
2733
2734static inline __be32
2735check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
2736{
2737        if (ONE_STATEID(stateid) && (flags & RD_STATE))
2738                return nfs_ok;
2739        else if (locks_in_grace()) {
2740                /* Answer in remaining cases depends on existance of
2741                 * conflicting state; so we must wait out the grace period. */
2742                return nfserr_grace;
2743        } else if (flags & WR_STATE)
2744                return nfs4_share_conflict(current_fh,
2745                                NFS4_SHARE_DENY_WRITE);
2746        else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
2747                return nfs4_share_conflict(current_fh,
2748                                NFS4_SHARE_DENY_READ);
2749}
2750
2751/*
2752 * Allow READ/WRITE during grace period on recovered state only for files
2753 * that are not able to provide mandatory locking.
2754 */
2755static inline int
2756grace_disallows_io(struct inode *inode)
2757{
2758        return locks_in_grace() && mandatory_lock(inode);
2759}
2760
2761static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags)
2762{
2763        /*
2764         * When sessions are used the stateid generation number is ignored
2765         * when it is zero.
2766         */
2767        if ((flags & HAS_SESSION) && in->si_generation == 0)
2768                goto out;
2769
2770        /* If the client sends us a stateid from the future, it's buggy: */
2771        if (in->si_generation > ref->si_generation)
2772                return nfserr_bad_stateid;
2773        /*
2774         * The following, however, can happen.  For example, if the
2775         * client sends an open and some IO at the same time, the open
2776         * may bump si_generation while the IO is still in flight.
2777         * Thanks to hard links and renames, the client never knows what
2778         * file an open will affect.  So it could avoid that situation
2779         * only by serializing all opens and IO from the same open
2780         * owner.  To recover from the old_stateid error, the client
2781         * will just have to retry the IO:
2782         */
2783        if (in->si_generation < ref->si_generation)
2784                return nfserr_old_stateid;
2785out:
2786        return nfs_ok;
2787}
2788
2789static int is_delegation_stateid(stateid_t *stateid)
2790{
2791        return stateid->si_fileid == 0;
2792}
2793
2794/*
2795* Checks for stateid operations
2796*/
2797__be32
2798nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2799                           stateid_t *stateid, int flags, struct file **filpp)
2800{
2801        struct nfs4_stateid *stp = NULL;
2802        struct nfs4_delegation *dp = NULL;
2803        struct svc_fh *current_fh = &cstate->current_fh;
2804        struct inode *ino = current_fh->fh_dentry->d_inode;
2805        __be32 status;
2806
2807        if (filpp)
2808                *filpp = NULL;
2809
2810        if (grace_disallows_io(ino))
2811                return nfserr_grace;
2812
2813        if (nfsd4_has_session(cstate))
2814                flags |= HAS_SESSION;
2815
2816        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
2817                return check_special_stateids(current_fh, stateid, flags);
2818
2819        status = nfserr_stale_stateid;
2820        if (STALE_STATEID(stateid)) 
2821                goto out;
2822
2823        status = nfserr_bad_stateid;
2824        if (is_delegation_stateid(stateid)) {
2825                dp = find_delegation_stateid(ino, stateid);
2826                if (!dp) {
2827                        status = stateid_error_map(stateid);
2828                        goto out;
2829                }
2830                status = check_stateid_generation(stateid, &dp->dl_stateid,
2831                                                  flags);
2832                if (status)
2833                        goto out;
2834                status = nfs4_check_delegmode(dp, flags);
2835                if (status)
2836                        goto out;
2837                renew_client(dp->dl_client);
2838                if (filpp)
2839                        *filpp = dp->dl_vfs_file;
2840        } else { /* open or lock stateid */
2841                stp = find_stateid(stateid, flags);
2842                if (!stp) {
2843                        status = stateid_error_map(stateid);
2844                        goto out;
2845                }
2846                if (nfs4_check_fh(current_fh, stp))
2847                        goto out;
2848                if (!stp->st_stateowner->so_confirmed)
2849                        goto out;
2850                status = check_stateid_generation(stateid, &stp->st_stateid,
2851                                                  flags);
2852                if (status)
2853                        goto out;
2854                status = nfs4_check_openmode(stp, flags);
2855                if (status)
2856                        goto out;
2857                renew_client(stp->st_stateowner->so_client);
2858                if (filpp)
2859                        *filpp = stp->st_vfs_file;
2860        }
2861        status = nfs_ok;
2862out:
2863        return status;
2864}
2865
2866static inline int
2867setlkflg (int type)
2868{
2869        return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
2870                RD_STATE : WR_STATE;
2871}
2872
2873/* 
2874 * Checks for sequence id mutating operations. 
2875 */
2876static __be32
2877nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2878                         stateid_t *stateid, int flags,
2879                         struct nfs4_stateowner **sopp,
2880                         struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
2881{
2882        struct nfs4_stateid *stp;
2883        struct nfs4_stateowner *sop;
2884        struct svc_fh *current_fh = &cstate->current_fh;
2885        __be32 status;
2886
2887        dprintk("NFSD: preprocess_seqid_op: seqid=%d " 
2888                        "stateid = (%08x/%08x/%08x/%08x)\n", seqid,
2889                stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
2890                stateid->si_generation);
2891
2892        *stpp = NULL;
2893        *sopp = NULL;
2894
2895        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
2896                dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
2897                return nfserr_bad_stateid;
2898        }
2899
2900        if (STALE_STATEID(stateid))
2901                return nfserr_stale_stateid;
2902
2903        if (nfsd4_has_session(cstate))
2904                flags |= HAS_SESSION;
2905
2906        /*
2907        * We return BAD_STATEID if filehandle doesn't match stateid, 
2908        * the confirmed flag is incorrecly set, or the generation 
2909        * number is incorrect.  
2910        */
2911        stp = find_stateid(stateid, flags);
2912        if (stp == NULL) {
2913                /*
2914                 * Also, we should make sure this isn't just the result of
2915                 * a replayed close:
2916                 */
2917                sop = search_close_lru(stateid->si_stateownerid, flags);
2918                if (sop == NULL)
2919                        return stateid_error_map(stateid);
2920                *sopp = sop;
2921                goto check_replay;
2922        }
2923
2924        *stpp = stp;
2925        *sopp = sop = stp->st_stateowner;
2926
2927        if (lock) {
2928                clientid_t *lockclid = &lock->v.new.clientid;
2929                struct nfs4_client *clp = sop->so_client;
2930                int lkflg = 0;
2931                __be32 status;
2932
2933                lkflg = setlkflg(lock->lk_type);
2934
2935                if (lock->lk_is_new) {
2936                        if (!sop->so_is_open_owner)
2937                                return nfserr_bad_stateid;
2938                        if (!(flags & HAS_SESSION) &&
2939                            !same_clid(&clp->cl_clientid, lockclid))
2940                                return nfserr_bad_stateid;
2941                        /* stp is the open stateid */
2942                        status = nfs4_check_openmode(stp, lkflg);
2943                        if (status)
2944                                return status;
2945                } else {
2946                        /* stp is the lock stateid */
2947                        status = nfs4_check_openmode(stp->st_openstp, lkflg);
2948                        if (status)
2949                                return status;
2950               }
2951        }
2952
2953        if (nfs4_check_fh(current_fh, stp)) {
2954                dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
2955                return nfserr_bad_stateid;
2956        }
2957
2958        /*
2959        *  We now validate the seqid and stateid generation numbers.
2960        *  For the moment, we ignore the possibility of 
2961        *  generation number wraparound.
2962        */
2963        if (!(flags & HAS_SESSION) && seqid != sop->so_seqid)
2964                goto check_replay;
2965
2966        if (sop->so_confirmed && flags & CONFIRM) {
2967                dprintk("NFSD: preprocess_seqid_op: expected"
2968                                " unconfirmed stateowner!\n");
2969                return nfserr_bad_stateid;
2970        }
2971        if (!sop->so_confirmed && !(flags & CONFIRM)) {
2972                dprintk("NFSD: preprocess_seqid_op: stateowner not"
2973                                " confirmed yet!\n");
2974                return nfserr_bad_stateid;
2975        }
2976        status = check_stateid_generation(stateid, &stp->st_stateid, flags);
2977        if (status)
2978                return status;
2979        renew_client(sop->so_client);
2980        return nfs_ok;
2981
2982check_replay:
2983        if (seqid == sop->so_seqid - 1) {
2984                dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
2985                /* indicate replay to calling function */
2986                return nfserr_replay_me;
2987        }
2988        dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
2989                        sop->so_seqid, seqid);
2990        *sopp = NULL;
2991        return nfserr_bad_seqid;
2992}
2993
2994__be32
2995nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2996                   struct nfsd4_open_confirm *oc)
2997{
2998        __be32 status;
2999        struct nfs4_stateowner *sop;
3000        struct nfs4_stateid *stp;
3001
3002        dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
3003                        (int)cstate->current_fh.fh_dentry->d_name.len,
3004                        cstate->current_fh.fh_dentry->d_name.name);
3005
3006        status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
3007        if (status)
3008                return status;
3009
3010        nfs4_lock_state();
3011
3012        if ((status = nfs4_preprocess_seqid_op(cstate,
3013                                        oc->oc_seqid, &oc->oc_req_stateid,
3014                                        CONFIRM | OPEN_STATE,
3015                                        &oc->oc_stateowner, &stp, NULL)))
3016                goto out; 
3017
3018        sop = oc->oc_stateowner;
3019        sop->so_confirmed = 1;
3020        update_stateid(&stp->st_stateid);
3021        memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
3022        dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 
3023                "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
3024                         stp->st_stateid.si_boot,
3025                         stp->st_stateid.si_stateownerid,
3026                         stp->st_stateid.si_fileid,
3027                         stp->st_stateid.si_generation);
3028
3029        nfsd4_create_clid_dir(sop->so_client);
3030out:
3031        if (oc->oc_stateowner) {
3032                nfs4_get_stateowner(oc->oc_stateowner);
3033                cstate->replay_owner = oc->oc_stateowner;
3034        }
3035        nfs4_unlock_state();
3036        return status;
3037}
3038
3039
3040/*
3041 * unset all bits in union bitmap (bmap) that
3042 * do not exist in share (from successful OPEN_DOWNGRADE)
3043 */
3044static void
3045reset_union_bmap_access(unsigned long access, unsigned long *bmap)
3046{
3047        int i;
3048        for (i = 1; i < 4; i++) {
3049                if ((i & access) != i)
3050                        __clear_bit(i, bmap);
3051        }
3052}
3053
3054static void
3055reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
3056{
3057        int i;
3058        for (i = 0; i < 4; i++) {
3059                if ((i & deny) != i)
3060                        __clear_bit(i, bmap);
3061        }
3062}
3063
3064__be32
3065nfsd4_open_downgrade(struct svc_rqst *rqstp,
3066                     struct nfsd4_compound_state *cstate,
3067                     struct nfsd4_open_downgrade *od)
3068{
3069        __be32 status;
3070        struct nfs4_stateid *stp;
3071        unsigned int share_access;
3072
3073        dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 
3074                        (int)cstate->current_fh.fh_dentry->d_name.len,
3075                        cstate->current_fh.fh_dentry->d_name.name);
3076
3077        if (!access_valid(od->od_share_access, cstate->minorversion)
3078                        || !deny_valid(od->od_share_deny))
3079                return nfserr_inval;
3080
3081        nfs4_lock_state();
3082        if ((status = nfs4_preprocess_seqid_op(cstate,
3083                                        od->od_seqid,
3084                                        &od->od_stateid, 
3085                                        OPEN_STATE,
3086                                        &od->od_stateowner, &stp, NULL)))
3087                goto out; 
3088
3089        status = nfserr_inval;
3090        if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
3091                dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
3092                        stp->st_access_bmap, od->od_share_access);
3093                goto out;
3094        }
3095        if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) {
3096                dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
3097                        stp->st_deny_bmap, od->od_share_deny);
3098                goto out;
3099        }
3100        set_access(&share_access, stp->st_access_bmap);
3101        nfs4_file_downgrade(stp->st_vfs_file,
3102                            share_access & ~od->od_share_access);
3103
3104        reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
3105        reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
3106
3107        update_stateid(&stp->st_stateid);
3108        memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
3109        status = nfs_ok;
3110out:
3111        if (od->od_stateowner) {
3112                nfs4_get_stateowner(od->od_stateowner);
3113                cstate->replay_owner = od->od_stateowner;
3114        }
3115        nfs4_unlock_state();
3116        return status;
3117}
3118
3119/*
3120 * nfs4_unlock_state() called after encode
3121 */
3122__be32
3123nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3124            struct nfsd4_close *close)
3125{
3126        __be32 status;
3127        struct nfs4_stateid *stp;
3128
3129        dprintk("NFSD: nfsd4_close on file %.*s\n", 
3130                        (int)cstate->current_fh.fh_dentry->d_name.len,
3131                        cstate->current_fh.fh_dentry->d_name.name);
3132
3133        nfs4_lock_state();
3134        /* check close_lru for replay */
3135        if ((status = nfs4_preprocess_seqid_op(cstate,
3136                                        close->cl_seqid,
3137                                        &close->cl_stateid, 
3138                                        OPEN_STATE | CLOSE_STATE,
3139                                        &close->cl_stateowner, &stp, NULL)))
3140                goto out; 
3141        status = nfs_ok;
3142        update_stateid(&stp->st_stateid);
3143        memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
3144
3145        /* release_stateid() calls nfsd_close() if needed */
3146        release_open_stateid(stp);
3147
3148        /* place unused nfs4_stateowners on so_close_lru list to be
3149         * released by the laundromat service after the lease period
3150         * to enable us to handle CLOSE replay
3151         */
3152        if (list_empty(&close->cl_stateowner->so_stateids))
3153                move_to_close_lru(close->cl_stateowner);
3154out:
3155        if (close->cl_stateowner) {
3156                nfs4_get_stateowner(close->cl_stateowner);
3157                cstate->replay_owner = close->cl_stateowner;
3158        }
3159        nfs4_unlock_state();
3160        return status;
3161}
3162
3163__be32
3164nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3165                  struct nfsd4_delegreturn *dr)
3166{
3167        struct nfs4_delegation *dp;
3168        stateid_t *stateid = &dr->dr_stateid;
3169        struct inode *inode;
3170        __be32 status;
3171        int flags = 0;
3172
3173        if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
3174                return status;
3175        inode = cstate->current_fh.fh_dentry->d_inode;
3176
3177        if (nfsd4_has_session(cstate))
3178                flags |= HAS_SESSION;
3179        nfs4_lock_state();
3180        status = nfserr_bad_stateid;
3181        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3182                goto out;
3183        status = nfserr_stale_stateid;
3184        if (STALE_STATEID(stateid))
3185                goto out;
3186        status = nfserr_bad_stateid;
3187        if (!is_delegation_stateid(stateid))
3188                goto out;
3189        dp = find_delegation_stateid(inode, stateid);
3190        if (!dp) {
3191                status = stateid_error_map(stateid);
3192                goto out;
3193        }
3194        status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
3195        if (status)
3196                goto out;
3197        renew_client(dp->dl_client);
3198
3199        unhash_delegation(dp);
3200out:
3201        nfs4_unlock_state();
3202
3203        return status;
3204}
3205
3206
3207/* 
3208 * Lock owner state (byte-range locks)
3209 */
3210#define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
3211#define LOCK_HASH_BITS              8
3212#define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
3213#define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
3214
3215static inline u64
3216end_offset(u64 start, u64 len)
3217{
3218        u64 end;
3219
3220        end = start + len;
3221        return end >= start ? end: NFS4_MAX_UINT64;
3222}
3223
3224/* last octet in a range */
3225static inline u64
3226last_byte_offset(u64 start, u64 len)
3227{
3228        u64 end;
3229
3230        BUG_ON(!len);
3231        end = start + len;
3232        return end > start ? end - 1: NFS4_MAX_UINT64;
3233}
3234
3235#define lockownerid_hashval(id) \
3236        ((id) & LOCK_HASH_MASK)
3237
3238static inline unsigned int
3239lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
3240                struct xdr_netobj *ownername)
3241{
3242        return (file_hashval(inode) + cl_id
3243                        + opaque_hashval(ownername->data, ownername->len))
3244                & LOCK_HASH_MASK;
3245}
3246
3247static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
3248static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
3249static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
3250
3251static struct nfs4_stateid *
3252find_stateid(stateid_t *stid, int flags)
3253{
3254        struct nfs4_stateid *local;
3255        u32 st_id = stid->si_stateownerid;
3256        u32 f_id = stid->si_fileid;
3257        unsigned int hashval;
3258
3259        dprintk("NFSD: find_stateid flags 0x%x\n",flags);
3260        if (flags & (LOCK_STATE | RD_STATE | WR_STATE)) {
3261                hashval = stateid_hashval(st_id, f_id);
3262                list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
3263                        if ((local->st_stateid.si_stateownerid == st_id) &&
3264                            (local->st_stateid.si_fileid == f_id))
3265                                return local;
3266                }
3267        } 
3268
3269        if (flags & (OPEN_STATE | RD_STATE | WR_STATE)) {
3270                hashval = stateid_hashval(st_id, f_id);
3271                list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
3272                        if ((local->st_stateid.si_stateownerid == st_id) &&
3273                            (local->st_stateid.si_fileid == f_id))
3274                                return local;
3275                }
3276        }
3277        return NULL;
3278}
3279
3280static struct nfs4_delegation *
3281find_delegation_stateid(struct inode *ino, stateid_t *stid)
3282{
3283        struct nfs4_file *fp;
3284        struct nfs4_delegation *dl;
3285
3286        dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
3287                    stid->si_boot, stid->si_stateownerid,
3288                    stid->si_fileid, stid->si_generation);
3289
3290        fp = find_file(ino);
3291        if (!fp)
3292                return NULL;
3293        dl = find_delegation_file(fp, stid);
3294        put_nfs4_file(fp);
3295        return dl;
3296}
3297
3298/*
3299 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
3300 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
3301 * byte, because of sign extension problems.  Since NFSv4 calls for 64-bit
3302 * locking, this prevents us from being completely protocol-compliant.  The
3303 * real solution to this problem is to start using unsigned file offsets in
3304 * the VFS, but this is a very deep change!
3305 */
3306static inline void
3307nfs4_transform_lock_offset(struct file_lock *lock)
3308{
3309        if (lock->fl_start < 0)
3310                lock->fl_start = OFFSET_MAX;
3311        if (lock->fl_end < 0)
3312                lock->fl_end = OFFSET_MAX;
3313}
3314
3315/* Hack!: For now, we're defining this just so we can use a pointer to it
3316 * as a unique cookie to identify our (NFSv4's) posix locks. */
3317static const struct lock_manager_operations nfsd_posix_mng_ops  = {
3318};
3319
3320static inline void
3321nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
3322{
3323        struct nfs4_stateowner *sop;
3324        unsigned int hval;
3325
3326        if (fl->fl_lmops == &nfsd_posix_mng_ops) {
3327                sop = (struct nfs4_stateowner *) fl->fl_owner;
3328                hval = lockownerid_hashval(sop->so_id);
3329                kref_get(&sop->so_ref);
3330                deny->ld_sop = sop;
3331                deny->ld_clientid = sop->so_client->cl_clientid;
3332        } else {
3333                deny->ld_sop = NULL;
3334                deny->ld_clientid.cl_boot = 0;
3335                deny->ld_clientid.cl_id = 0;
3336        }
3337        deny->ld_start = fl->fl_start;
3338        deny->ld_length = NFS4_MAX_UINT64;
3339        if (fl->fl_end != NFS4_MAX_UINT64)
3340                deny->ld_length = fl->fl_end - fl->fl_start + 1;        
3341        deny->ld_type = NFS4_READ_LT;
3342        if (fl->fl_type != F_RDLCK)
3343                deny->ld_type = NFS4_WRITE_LT;
3344}
3345
3346static struct nfs4_stateowner *
3347find_lockstateowner_str(struct inode *inode, clientid_t *clid,
3348                struct xdr_netobj *owner)
3349{
3350        unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
3351        struct nfs4_stateowner *op;
3352
3353        list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
3354                if (same_owner_str(op, owner, clid))
3355                        return op;
3356        }
3357        return NULL;
3358}
3359
3360/*
3361 * Alloc a lock owner structure.
3362 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
3363 * occured. 
3364 *
3365 * strhashval = lock_ownerstr_hashval 
3366 */
3367
3368static struct nfs4_stateowner *
3369alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) {
3370        struct nfs4_stateowner *sop;
3371        struct nfs4_replay *rp;
3372        unsigned int idhashval;
3373
3374        if (!(sop = alloc_stateowner(&lock->lk_new_owner)))
3375                return NULL;
3376        idhashval = lockownerid_hashval(current_ownerid);
3377        INIT_LIST_HEAD(&sop->so_idhash);
3378        INIT_LIST_HEAD(&sop->so_strhash);
3379        INIT_LIST_HEAD(&sop->so_perclient);
3380        INIT_LIST_HEAD(&sop->so_stateids);
3381        INIT_LIST_HEAD(&sop->so_perstateid);
3382        INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
3383        sop->so_time = 0;
3384        list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
3385        list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
3386        list_add(&sop->so_perstateid, &open_stp->st_lockowners);
3387        sop->so_is_open_owner = 0;
3388        sop->so_id = current_ownerid++;
3389        sop->so_client = clp;
3390        /* It is the openowner seqid that will be incremented in encode in the
3391         * case of new lockowners; so increment the lock seqid manually: */
3392        sop->so_seqid = lock->lk_new_lock_seqid + 1;
3393        sop->so_confirmed = 1;
3394        rp = &sop->so_replay;
3395        rp->rp_status = nfserr_serverfault;
3396        rp->rp_buflen = 0;
3397        rp->rp_buf = rp->rp_ibuf;
3398        return sop;
3399}
3400
3401static struct nfs4_stateid *
3402alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
3403{
3404        struct nfs4_stateid *stp;
3405        unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
3406
3407        stp = nfs4_alloc_stateid();
3408        if (stp == NULL)
3409                goto out;
3410        INIT_LIST_HEAD(&stp->st_hash);
3411        INIT_LIST_HEAD(&stp->st_perfile);
3412        INIT_LIST_HEAD(&stp->st_perstateowner);
3413        INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
3414        list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
3415        list_add(&stp->st_perfile, &fp->fi_stateids);
3416        list_add(&stp->st_perstateowner, &sop->so_stateids);
3417        stp->st_stateowner = sop;
3418        get_nfs4_file(fp);
3419        stp->st_file = fp;
3420        stp->st_stateid.si_boot = get_seconds();
3421        stp->st_stateid.si_stateownerid = sop->so_id;
3422        stp->st_stateid.si_fileid = fp->fi_id;
3423        stp->st_stateid.si_generation = 0;
3424        stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
3425        stp->st_access_bmap = open_stp->st_access_bmap;
3426        stp->st_deny_bmap = open_stp->st_deny_bmap;
3427        stp->st_openstp = open_stp;
3428
3429out:
3430        return stp;
3431}
3432
3433static int
3434check_lock_length(u64 offset, u64 length)
3435{
3436        return ((length == 0)  || ((length != NFS4_MAX_UINT64) &&
3437             LOFF_OVERFLOW(offset, length)));
3438}
3439
3440/*
3441 *  LOCK operation 
3442 */
3443__be32
3444nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3445           struct nfsd4_lock *lock)
3446{
3447        struct nfs4_stateowner *open_sop = NULL;
3448        struct nfs4_stateowner *lock_sop = NULL;
3449        struct nfs4_stateid *lock_stp;
3450        struct file *filp;
3451        struct file_lock file_lock;
3452        struct file_lock conflock;
3453        __be32 status = 0;
3454        unsigned int strhashval;
3455        unsigned int cmd;
3456        int err;
3457
3458        dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
3459                (long long) lock->lk_offset,
3460                (long long) lock->lk_length);
3461
3462        if (check_lock_length(lock->lk_offset, lock->lk_length))
3463                 return nfserr_inval;
3464
3465        if ((status = fh_verify(rqstp, &cstate->current_fh,
3466                                S_IFREG, NFSD_MAY_LOCK))) {
3467                dprintk("NFSD: nfsd4_lock: permission denied!\n");
3468                return status;
3469        }
3470
3471        nfs4_lock_state();
3472
3473        if (lock->lk_is_new) {
3474                /*
3475                 * Client indicates that this is a new lockowner.
3476                 * Use open owner and open stateid to create lock owner and
3477                 * lock stateid.
3478                 */
3479                struct nfs4_stateid *open_stp = NULL;
3480                struct nfs4_file *fp;
3481                
3482                status = nfserr_stale_clientid;
3483                if (!nfsd4_has_session(cstate) &&
3484                    STALE_CLIENTID(&lock->lk_new_clientid))
3485                        goto out;
3486
3487                /* validate and update open stateid and open seqid */
3488                status = nfs4_preprocess_seqid_op(cstate,
3489                                        lock->lk_new_open_seqid,
3490                                        &lock->lk_new_open_stateid,
3491                                        OPEN_STATE,
3492                                        &lock->lk_replay_owner, &open_stp,
3493                                        lock);
3494                if (status)
3495                        goto out;
3496                open_sop = lock->lk_replay_owner;
3497                /* create lockowner and lock stateid */
3498                fp = open_stp->st_file;
3499                strhashval = lock_ownerstr_hashval(fp->fi_inode, 
3500                                open_sop->so_client->cl_clientid.cl_id, 
3501                                &lock->v.new.owner);
3502                /* XXX: Do we need to check for duplicate stateowners on
3503                 * the same file, or should they just be allowed (and
3504                 * create new stateids)? */
3505                status = nfserr_resource;
3506                lock_sop = alloc_init_lock_stateowner(strhashval,
3507                                open_sop->so_client, open_stp, lock);
3508                if (lock_sop == NULL)
3509                        goto out;
3510                lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
3511                if (lock_stp == NULL)
3512                        goto out;
3513        } else {
3514                /* lock (lock owner + lock stateid) already exists */
3515                status = nfs4_preprocess_seqid_op(cstate,
3516                                       lock->lk_old_lock_seqid, 
3517                                       &lock->lk_old_lock_stateid, 
3518                                       LOCK_STATE,
3519                                       &lock->lk_replay_owner, &lock_stp, lock);
3520                if (status)
3521                        goto out;
3522                lock_sop = lock->lk_replay_owner;
3523        }
3524        /* lock->lk_replay_owner and lock_stp have been created or found */
3525        filp = lock_stp->st_vfs_file;
3526
3527        status = nfserr_grace;
3528        if (locks_in_grace() && !lock->lk_reclaim)
3529                goto out;
3530        status = nfserr_no_grace;
3531        if (!locks_in_grace() && lock->lk_reclaim)
3532                goto out;
3533
3534        locks_init_lock(&file_lock);
3535        switch (lock->lk_type) {
3536                case NFS4_READ_LT:
3537                case NFS4_READW_LT:
3538                        file_lock.fl_type = F_RDLCK;
3539                        cmd = F_SETLK;
3540                break;
3541                case NFS4_WRITE_LT:
3542                case NFS4_WRITEW_LT:
3543                        file_lock.fl_type = F_WRLCK;
3544                        cmd = F_SETLK;
3545                break;
3546                default:
3547                        status = nfserr_inval;
3548                goto out;
3549        }
3550        file_lock.fl_owner = (fl_owner_t)lock_sop;
3551        file_lock.fl_pid = current->tgid;
3552        file_lock.fl_file = filp;
3553        file_lock.fl_flags = FL_POSIX;
3554        file_lock.fl_lmops = &nfsd_posix_mng_ops;
3555
3556        file_lock.fl_start = lock->lk_offset;
3557        file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
3558        nfs4_transform_lock_offset(&file_lock);
3559
3560        /*
3561        * Try to lock the file in the VFS.
3562        * Note: locks.c uses the BKL to protect the inode's lock list.
3563        */
3564
3565        err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
3566        switch (-err) {
3567        case 0: /* success! */
3568                update_stateid(&lock_stp->st_stateid);
3569                memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, 
3570                                sizeof(stateid_t));
3571                status = 0;
3572                break;
3573        case (EAGAIN):          /* conflock holds conflicting lock */
3574                status = nfserr_denied;
3575                dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
3576                nfs4_set_lock_denied(&conflock, &lock->lk_denied);
3577                break;
3578        case (EDEADLK):
3579                status = nfserr_deadlock;
3580                break;
3581        default:        
3582                dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
3583                status = nfserr_resource;
3584                break;
3585        }
3586out:
3587        if (status && lock->lk_is_new && lock_sop)
3588                release_lockowner(lock_sop);
3589        if (lock->lk_replay_owner) {
3590                nfs4_get_stateowner(lock->lk_replay_owner);
3591                cstate->replay_owner = lock->lk_replay_owner;
3592        }
3593        nfs4_unlock_state();
3594        return status;
3595}
3596
3597/*
3598 * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
3599 * so we do a temporary open here just to get an open file to pass to
3600 * vfs_test_lock.  (Arguably perhaps test_lock should be done with an
3601 * inode operation.)
3602 */
3603static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
3604{
3605        struct file *file;
3606        int err;
3607
3608        err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
3609        if (err)
3610                return err;
3611        err = vfs_test_lock(file, lock);
3612        nfsd_close(file);
3613        return err;
3614}
3615
3616/*
3617 * LOCKT operation
3618 */
3619__be32
3620nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3621            struct nfsd4_lockt *lockt)
3622{
3623        struct inode *inode;
3624        struct file_lock file_lock;
3625        int error;
3626        __be32 status;
3627
3628        if (locks_in_grace())
3629                return nfserr_grace;
3630
3631        if (check_lock_length(lockt->lt_offset, lockt->lt_length))
3632                 return nfserr_inval;
3633
3634        lockt->lt_stateowner = NULL;
3635        nfs4_lock_state();
3636
3637        status = nfserr_stale_clientid;
3638        if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid))
3639                goto out;
3640
3641        if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) {
3642                dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
3643                if (status == nfserr_symlink)
3644                        status = nfserr_inval;
3645                goto out;
3646        }
3647
3648        inode = cstate->current_fh.fh_dentry->d_inode;
3649        locks_init_lock(&file_lock);
3650        switch (lockt->lt_type) {
3651                case NFS4_READ_LT:
3652                case NFS4_READW_LT:
3653                        file_lock.fl_type = F_RDLCK;
3654                break;
3655                case NFS4_WRITE_LT:
3656                case NFS4_WRITEW_LT:
3657                        file_lock.fl_type = F_WRLCK;
3658                break;
3659                default:
3660                        dprintk("NFSD: nfs4_lockt: bad lock type!\n");
3661                        status = nfserr_inval;
3662                goto out;
3663        }
3664
3665        lockt->lt_stateowner = find_lockstateowner_str(inode,
3666                        &lockt->lt_clientid, &lockt->lt_owner);
3667        if (lockt->lt_stateowner)
3668                file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
3669        file_lock.fl_pid = current->tgid;
3670        file_lock.fl_flags = FL_POSIX;
3671
3672        file_lock.fl_start = lockt->lt_offset;
3673        file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
3674
3675        nfs4_transform_lock_offset(&file_lock);
3676
3677        status = nfs_ok;
3678        error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
3679        if (error) {
3680                status = nfserrno(error);
3681                goto out;
3682        }
3683        if (file_lock.fl_type != F_UNLCK) {
3684                status = nfserr_denied;
3685                nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
3686        }
3687out:
3688        nfs4_unlock_state();
3689        return status;
3690}
3691
3692__be32
3693nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3694            struct nfsd4_locku *locku)
3695{
3696        struct nfs4_stateid *stp;
3697        struct file *filp = NULL;
3698        struct file_lock file_lock;
3699        __be32 status;
3700        int err;
3701                                                        
3702        dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
3703                (long long) locku->lu_offset,
3704                (long long) locku->lu_length);
3705
3706        if (check_lock_length(locku->lu_offset, locku->lu_length))
3707                 return nfserr_inval;
3708
3709        nfs4_lock_state();
3710                                                                                
3711        if ((status = nfs4_preprocess_seqid_op(cstate,
3712                                        locku->lu_seqid, 
3713                                        &locku->lu_stateid, 
3714                                        LOCK_STATE,
3715                                        &locku->lu_stateowner, &stp, NULL)))
3716                goto out;
3717
3718        filp = stp->st_vfs_file;
3719        BUG_ON(!filp);
3720        locks_init_lock(&file_lock);
3721        file_lock.fl_type = F_UNLCK;
3722        file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner;
3723        file_lock.fl_pid = current->tgid;
3724        file_lock.fl_file = filp;
3725        file_lock.fl_flags = FL_POSIX; 
3726        file_lock.fl_lmops = &nfsd_posix_mng_ops;
3727        file_lock.fl_start = locku->lu_offset;
3728
3729        file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length);
3730        nfs4_transform_lock_offset(&file_lock);
3731
3732        /*
3733        *  Try to unlock the file in the VFS.
3734        */
3735        err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL);
3736        if (err) {
3737                dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
3738                goto out_nfserr;
3739        }
3740        /*
3741        * OK, unlock succeeded; the only thing left to do is update the stateid.
3742        */
3743        update_stateid(&stp->st_stateid);
3744        memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
3745
3746out:
3747        if (locku->lu_stateowner) {
3748                nfs4_get_stateowner(locku->lu_stateowner);
3749                cstate->replay_owner = locku->lu_stateowner;
3750        }
3751        nfs4_unlock_state();
3752        return status;
3753
3754out_nfserr:
3755        status = nfserrno(err);
3756        goto out;
3757}
3758
3759/*
3760 * returns
3761 *      1: locks held by lockowner
3762 *      0: no locks held by lockowner
3763 */
3764static int
3765check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
3766{
3767        struct file_lock **flpp;
3768        struct inode *inode = filp->f_path.dentry->d_inode;
3769        int status = 0;
3770
3771        lock_kernel();
3772        for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
3773                if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
3774                        status = 1;
3775                        goto out;
3776                }
3777        }
3778out:
3779        unlock_kernel();
3780        return status;
3781}
3782
3783__be32
3784nfsd4_release_lockowner(struct svc_rqst *rqstp,
3785                        struct nfsd4_compound_state *cstate,
3786                        struct nfsd4_release_lockowner *rlockowner)
3787{
3788        clientid_t *clid = &rlockowner->rl_clientid;
3789        struct nfs4_stateowner *sop;
3790        struct nfs4_stateid *stp;
3791        struct xdr_netobj *owner = &rlockowner->rl_owner;
3792        struct list_head matches;
3793        int i;
3794        __be32 status;
3795
3796        dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
3797                clid->cl_boot, clid->cl_id);
3798
3799        /* XXX check for lease expiration */
3800
3801        status = nfserr_stale_clientid;
3802        if (STALE_CLIENTID(clid))
3803                return status;
3804
3805        nfs4_lock_state();
3806
3807        status = nfserr_locks_held;
3808        /* XXX: we're doing a linear search through all the lockowners.
3809         * Yipes!  For now we'll just hope clients aren't really using
3810         * release_lockowner much, but eventually we have to fix these
3811         * data structures. */
3812        INIT_LIST_HEAD(&matches);
3813        for (i = 0; i < LOCK_HASH_SIZE; i++) {
3814                list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
3815                        if (!same_owner_str(sop, owner, clid))
3816                                continue;
3817                        list_for_each_entry(stp, &sop->so_stateids,
3818                                        st_perstateowner) {
3819                                if (check_for_locks(stp->st_vfs_file, sop))
3820                                        goto out;
3821                                /* Note: so_perclient unused for lockowners,
3822                                 * so it's OK to fool with here. */
3823                                list_add(&sop->so_perclient, &matches);
3824                        }
3825                }
3826        }
3827        /* Clients probably won't expect us to return with some (but not all)
3828         * of the lockowner state released; so don't release any until all
3829         * have been checked. */
3830        status = nfs_ok;
3831        while (!list_empty(&matches)) {
3832                sop = list_entry(matches.next, struct nfs4_stateowner,
3833                                                                so_perclient);
3834                /* unhash_stateowner deletes so_perclient only
3835                 * for openowners. */
3836                list_del(&sop->so_perclient);
3837                release_lockowner(sop);
3838        }
3839out:
3840        nfs4_unlock_state();
3841        return status;
3842}
3843
3844static inline struct nfs4_client_reclaim *
3845alloc_reclaim(void)
3846{
3847        return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
3848}
3849
3850int
3851nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
3852{
3853        unsigned int strhashval = clientstr_hashval(name);
3854        struct nfs4_client *clp;
3855
3856        clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id);
3857        return clp ? 1 : 0;
3858}
3859
3860/*
3861 * failure => all reset bets are off, nfserr_no_grace...
3862 */
3863int
3864nfs4_client_to_reclaim(const char *name)
3865{
3866        unsigned int strhashval;
3867        struct nfs4_client_reclaim *crp = NULL;
3868
3869        dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
3870        crp = alloc_reclaim();
3871        if (!crp)
3872                return 0;
3873        strhashval = clientstr_hashval(name);
3874        INIT_LIST_HEAD(&crp->cr_strhash);
3875        list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
3876        memcpy(crp->cr_recdir, name, HEXDIR_LEN);
3877        reclaim_str_hashtbl_size++;
3878        return 1;
3879}
3880
3881static void
3882nfs4_release_reclaim(void)
3883{
3884        struct nfs4_client_reclaim *crp = NULL;
3885        int i;
3886
3887        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3888                while (!list_empty(&reclaim_str_hashtbl[i])) {
3889                        crp = list_entry(reclaim_str_hashtbl[i].next,
3890                                        struct nfs4_client_reclaim, cr_strhash);
3891                        list_del(&crp->cr_strhash);
3892                        kfree(crp);
3893                        reclaim_str_hashtbl_size--;
3894                }
3895        }
3896        BUG_ON(reclaim_str_hashtbl_size);
3897}
3898
3899/*
3900 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
3901static struct nfs4_client_reclaim *
3902nfs4_find_reclaim_client(clientid_t *clid)
3903{
3904        unsigned int strhashval;
3905        struct nfs4_client *clp;
3906        struct nfs4_client_reclaim *crp = NULL;
3907
3908
3909        /* find clientid in conf_id_hashtbl */
3910        clp = find_confirmed_client(clid);
3911        if (clp == NULL)
3912                return NULL;
3913
3914        dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
3915                            clp->cl_name.len, clp->cl_name.data,
3916                            clp->cl_recdir);
3917
3918        /* find clp->cl_name in reclaim_str_hashtbl */
3919        strhashval = clientstr_hashval(clp->cl_recdir);
3920        list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
3921                if (same_name(crp->cr_recdir, clp->cl_recdir)) {
3922                        return crp;
3923                }
3924        }
3925        return NULL;
3926}
3927
3928/*
3929* Called from OPEN. Look for clientid in reclaim list.
3930*/
3931__be32
3932nfs4_check_open_reclaim(clientid_t *clid)
3933{
3934        return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
3935}
3936
3937/* initialization to perform at module load time: */
3938
3939int
3940nfs4_state_init(void)
3941{
3942        int i, status;
3943
3944        status = nfsd4_init_slabs();
3945        if (status)
3946                return status;
3947        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3948                INIT_LIST_HEAD(&conf_id_hashtbl[i]);
3949                INIT_LIST_HEAD(&conf_str_hashtbl[i]);
3950                INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
3951                INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
3952                INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
3953        }
3954        for (i = 0; i < SESSION_HASH_SIZE; i++)
3955                INIT_LIST_HEAD(&sessionid_hashtbl[i]);
3956        for (i = 0; i < FILE_HASH_SIZE; i++) {
3957                INIT_LIST_HEAD(&file_hashtbl[i]);
3958        }
3959        for (i = 0; i < OWNER_HASH_SIZE; i++) {
3960                INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
3961                INIT_LIST_HEAD(&ownerid_hashtbl[i]);
3962        }
3963        for (i = 0; i < STATEID_HASH_SIZE; i++) {
3964                INIT_LIST_HEAD(&stateid_hashtbl[i]);
3965                INIT_LIST_HEAD(&lockstateid_hashtbl[i]);
3966        }
3967        for (i = 0; i < LOCK_HASH_SIZE; i++) {
3968                INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
3969                INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
3970        }
3971        memset(&onestateid, ~0, sizeof(stateid_t));
3972        INIT_LIST_HEAD(&close_lru);
3973        INIT_LIST_HEAD(&client_lru);
3974        INIT_LIST_HEAD(&del_recall_lru);
3975        reclaim_str_hashtbl_size = 0;
3976        return 0;
3977}
3978
3979static void
3980nfsd4_load_reboot_recovery_data(void)
3981{
3982        int status;
3983
3984        nfs4_lock_state();
3985        nfsd4_init_recdir(user_recovery_dirname);
3986        status = nfsd4_recdir_load();
3987        nfs4_unlock_state();
3988        if (status)
3989                printk("NFSD: Failure reading reboot recovery data\n");
3990}
3991
3992unsigned long
3993get_nfs4_grace_period(void)
3994{
3995        return max(user_lease_time, lease_time) * HZ;
3996}
3997
3998/*
3999 * Since the lifetime of a delegation isn't limited to that of an open, a
4000 * client may quite reasonably hang on to a delegation as long as it has
4001 * the inode cached.  This becomes an obvious problem the first time a
4002 * client's inode cache approaches the size of the server's total memory.
4003 *
4004 * For now we avoid this problem by imposing a hard limit on the number
4005 * of delegations, which varies according to the server's memory size.
4006 */
4007static void
4008set_max_delegations(void)
4009{
4010        /*
4011         * Allow at most 4 delegations per megabyte of RAM.  Quick
4012         * estimates suggest that in the worst case (where every delegation
4013         * is for a different inode), a delegation could take about 1.5K,
4014         * giving a worst case usage of about 6% of memory.
4015         */
4016        max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
4017}
4018
4019/* initialization to perform when the nfsd service is started: */
4020
4021static int
4022__nfs4_state_start(void)
4023{
4024        unsigned long grace_time;
4025
4026        boot_time = get_seconds();
4027        grace_time = get_nfs4_grace_period();
4028        lease_time = user_lease_time;
4029        locks_start_grace(&nfsd4_manager);
4030        printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4031               grace_time/HZ);
4032        laundry_wq = create_singlethread_workqueue("nfsd4");
4033        if (laundry_wq == NULL)
4034                return -ENOMEM;
4035        queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
4036        set_max_delegations();
4037        return set_callback_cred();
4038}
4039
4040int
4041nfs4_state_start(void)
4042{
4043        int ret;
4044
4045        if (nfs4_init)
4046                return 0;
4047        nfsd4_load_reboot_recovery_data();
4048        ret = __nfs4_state_start();
4049        if (ret)
4050                return ret;
4051        nfs4_init = 1;
4052        return 0;
4053}
4054
4055time_t
4056nfs4_lease_time(void)
4057{
4058        return lease_time;
4059}
4060
4061static void
4062__nfs4_state_shutdown(void)
4063{
4064        int i;
4065        struct nfs4_client *clp = NULL;
4066        struct nfs4_delegation *dp = NULL;
4067        struct list_head *pos, *next, reaplist;
4068
4069        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
4070                while (!list_empty(&conf_id_hashtbl[i])) {
4071                        clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
4072                        expire_client(clp);
4073                }
4074                while (!list_empty(&unconf_str_hashtbl[i])) {
4075                        clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
4076                        expire_client(clp);
4077                }
4078        }
4079        INIT_LIST_HEAD(&reaplist);
4080        spin_lock(&recall_lock);
4081        list_for_each_safe(pos, next, &del_recall_lru) {
4082                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4083                list_move(&dp->dl_recall_lru, &reaplist);
4084        }
4085        spin_unlock(&recall_lock);
4086        list_for_each_safe(pos, next, &reaplist) {
4087                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4088                list_del_init(&dp->dl_recall_lru);
4089                unhash_delegation(dp);
4090        }
4091
4092        nfsd4_shutdown_recdir();
4093        nfs4_init = 0;
4094}
4095
4096void
4097nfs4_state_shutdown(void)
4098{
4099        cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
4100        destroy_workqueue(laundry_wq);
4101        locks_end_grace(&nfsd4_manager);
4102        nfs4_lock_state();
4103        nfs4_release_reclaim();
4104        __nfs4_state_shutdown();
4105        nfs4_unlock_state();
4106}
4107
4108/*
4109 * user_recovery_dirname is protected by the nfsd_mutex since it's only
4110 * accessed when nfsd is starting.
4111 */
4112static void
4113nfs4_set_recdir(char *recdir)
4114{
4115        strcpy(user_recovery_dirname, recdir);
4116}
4117
4118/*
4119 * Change the NFSv4 recovery directory to recdir.
4120 */
4121int
4122nfs4_reset_recoverydir(char *recdir)
4123{
4124        int status;
4125        struct path path;
4126
4127        status = kern_path(recdir, LOOKUP_FOLLOW, &path);
4128        if (status)
4129                return status;
4130        status = -ENOTDIR;
4131        if (S_ISDIR(path.dentry->d_inode->i_mode)) {
4132                nfs4_set_recdir(recdir);
4133                status = 0;
4134        }
4135        path_put(&path);
4136        return status;
4137}
4138
4139char *
4140nfs4_recoverydir(void)
4141{
4142        return user_recovery_dirname;
4143}
4144
4145/*
4146 * Called when leasetime is changed.
4147 *
4148 * The only way the protocol gives us to handle on-the-fly lease changes is to
4149 * simulate a reboot.  Instead of doing that, we just wait till the next time
4150 * we start to register any changes in lease time.  If the administrator
4151 * really wants to change the lease time *now*, they can go ahead and bring
4152 * nfsd down and then back up again after changing the lease time.
4153 *
4154 * user_lease_time is protected by nfsd_mutex since it's only really accessed
4155 * when nfsd is starting
4156 */
4157void
4158nfs4_reset_lease(time_t leasetime)
4159{
4160        user_lease_time = leasetime;
4161}
4162