linux/fs/nfsd/nfs4recover.c
<<
>>
Prefs
   1/*
   2*  Copyright (c) 2004 The Regents of the University of Michigan.
   3*  Copyright (c) 2012 Jeff Layton <jlayton@redhat.com>
   4*  All rights reserved.
   5*
   6*  Andy Adamson <andros@citi.umich.edu>
   7*
   8*  Redistribution and use in source and binary forms, with or without
   9*  modification, are permitted provided that the following conditions
  10*  are met:
  11*
  12*  1. Redistributions of source code must retain the above copyright
  13*     notice, this list of conditions and the following disclaimer.
  14*  2. Redistributions in binary form must reproduce the above copyright
  15*     notice, this list of conditions and the following disclaimer in the
  16*     documentation and/or other materials provided with the distribution.
  17*  3. Neither the name of the University nor the names of its
  18*     contributors may be used to endorse or promote products derived
  19*     from this software without specific prior written permission.
  20*
  21*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  22*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  23*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  28*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  29*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  30*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  31*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32*
  33*/
  34
  35#include <crypto/hash.h>
  36#include <linux/file.h>
  37#include <linux/slab.h>
  38#include <linux/namei.h>
  39#include <linux/sched.h>
  40#include <linux/fs.h>
  41#include <linux/module.h>
  42#include <net/net_namespace.h>
  43#include <linux/sunrpc/rpc_pipe_fs.h>
  44#include <linux/sunrpc/clnt.h>
  45#include <linux/nfsd/cld.h>
  46
  47#include "nfsd.h"
  48#include "state.h"
  49#include "vfs.h"
  50#include "netns.h"
  51
  52#define NFSDDBG_FACILITY                NFSDDBG_PROC
  53
  54/* Declarations */
  55struct nfsd4_client_tracking_ops {
  56        int (*init)(struct net *);
  57        void (*exit)(struct net *);
  58        void (*create)(struct nfs4_client *);
  59        void (*remove)(struct nfs4_client *);
  60        int (*check)(struct nfs4_client *);
  61        void (*grace_done)(struct nfsd_net *);
  62};
  63
  64/* Globals */
  65static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
  66
  67static int
  68nfs4_save_creds(const struct cred **original_creds)
  69{
  70        struct cred *new;
  71
  72        new = prepare_creds();
  73        if (!new)
  74                return -ENOMEM;
  75
  76        new->fsuid = GLOBAL_ROOT_UID;
  77        new->fsgid = GLOBAL_ROOT_GID;
  78        *original_creds = override_creds(new);
  79        put_cred(new);
  80        return 0;
  81}
  82
  83static void
  84nfs4_reset_creds(const struct cred *original)
  85{
  86        revert_creds(original);
  87}
  88
  89static void
  90md5_to_hex(char *out, char *md5)
  91{
  92        int i;
  93
  94        for (i=0; i<16; i++) {
  95                unsigned char c = md5[i];
  96
  97                *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
  98                *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
  99        }
 100        *out = '\0';
 101}
 102
 103static int
 104nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
 105{
 106        struct xdr_netobj cksum;
 107        struct crypto_shash *tfm;
 108        int status;
 109
 110        dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 111                        clname->len, clname->data);
 112        tfm = crypto_alloc_shash("md5", 0, 0);
 113        if (IS_ERR(tfm)) {
 114                status = PTR_ERR(tfm);
 115                goto out_no_tfm;
 116        }
 117
 118        cksum.len = crypto_shash_digestsize(tfm);
 119        cksum.data = kmalloc(cksum.len, GFP_KERNEL);
 120        if (cksum.data == NULL) {
 121                status = -ENOMEM;
 122                goto out;
 123        }
 124
 125        {
 126                SHASH_DESC_ON_STACK(desc, tfm);
 127
 128                desc->tfm = tfm;
 129
 130                status = crypto_shash_digest(desc, clname->data, clname->len,
 131                                             cksum.data);
 132                shash_desc_zero(desc);
 133        }
 134
 135        if (status)
 136                goto out;
 137
 138        md5_to_hex(dname, cksum.data);
 139
 140        status = 0;
 141out:
 142        kfree(cksum.data);
 143        crypto_free_shash(tfm);
 144out_no_tfm:
 145        return status;
 146}
 147
 148/*
 149 * If we had an error generating the recdir name for the legacy tracker
 150 * then warn the admin. If the error doesn't appear to be transient,
 151 * then disable recovery tracking.
 152 */
 153static void
 154legacy_recdir_name_error(struct nfs4_client *clp, int error)
 155{
 156        printk(KERN_ERR "NFSD: unable to generate recoverydir "
 157                        "name (%d).\n", error);
 158
 159        /*
 160         * if the algorithm just doesn't exist, then disable the recovery
 161         * tracker altogether. The crypto libs will generally return this if
 162         * FIPS is enabled as well.
 163         */
 164        if (error == -ENOENT) {
 165                printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
 166                        "Reboot recovery will not function correctly!\n");
 167                nfsd4_client_tracking_exit(clp->net);
 168        }
 169}
 170
 171static void
 172__nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
 173                const char *dname, int len, struct nfsd_net *nn)
 174{
 175        struct xdr_netobj name;
 176        struct nfs4_client_reclaim *crp;
 177
 178        name.data = kmemdup(dname, len, GFP_KERNEL);
 179        if (!name.data) {
 180                dprintk("%s: failed to allocate memory for name.data!\n",
 181                        __func__);
 182                return;
 183        }
 184        name.len = len;
 185        crp = nfs4_client_to_reclaim(name, nn);
 186        if (!crp) {
 187                kfree(name.data);
 188                return;
 189        }
 190        crp->cr_clp = clp;
 191}
 192
 193static void
 194nfsd4_create_clid_dir(struct nfs4_client *clp)
 195{
 196        const struct cred *original_cred;
 197        char dname[HEXDIR_LEN];
 198        struct dentry *dir, *dentry;
 199        int status;
 200        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 201
 202        if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 203                return;
 204        if (!nn->rec_file)
 205                return;
 206
 207        status = nfs4_make_rec_clidname(dname, &clp->cl_name);
 208        if (status)
 209                return legacy_recdir_name_error(clp, status);
 210
 211        status = nfs4_save_creds(&original_cred);
 212        if (status < 0)
 213                return;
 214
 215        status = mnt_want_write_file(nn->rec_file);
 216        if (status)
 217                goto out_creds;
 218
 219        dir = nn->rec_file->f_path.dentry;
 220        /* lock the parent */
 221        inode_lock(d_inode(dir));
 222
 223        dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
 224        if (IS_ERR(dentry)) {
 225                status = PTR_ERR(dentry);
 226                goto out_unlock;
 227        }
 228        if (d_really_is_positive(dentry))
 229                /*
 230                 * In the 4.1 case, where we're called from
 231                 * reclaim_complete(), records from the previous reboot
 232                 * may still be left, so this is OK.
 233                 *
 234                 * In the 4.0 case, we should never get here; but we may
 235                 * as well be forgiving and just succeed silently.
 236                 */
 237                goto out_put;
 238        status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU);
 239out_put:
 240        dput(dentry);
 241out_unlock:
 242        inode_unlock(d_inode(dir));
 243        if (status == 0) {
 244                if (nn->in_grace)
 245                        __nfsd4_create_reclaim_record_grace(clp, dname,
 246                                        HEXDIR_LEN, nn);
 247                vfs_fsync(nn->rec_file, 0);
 248        } else {
 249                printk(KERN_ERR "NFSD: failed to write recovery record"
 250                                " (err %d); please check that %s exists"
 251                                " and is writeable", status,
 252                                user_recovery_dirname);
 253        }
 254        mnt_drop_write_file(nn->rec_file);
 255out_creds:
 256        nfs4_reset_creds(original_cred);
 257}
 258
 259typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *);
 260
 261struct name_list {
 262        char name[HEXDIR_LEN];
 263        struct list_head list;
 264};
 265
 266struct nfs4_dir_ctx {
 267        struct dir_context ctx;
 268        struct list_head names;
 269};
 270
 271static int
 272nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
 273                loff_t offset, u64 ino, unsigned int d_type)
 274{
 275        struct nfs4_dir_ctx *ctx =
 276                container_of(__ctx, struct nfs4_dir_ctx, ctx);
 277        struct name_list *entry;
 278
 279        if (namlen != HEXDIR_LEN - 1)
 280                return 0;
 281        entry = kmalloc(sizeof(struct name_list), GFP_KERNEL);
 282        if (entry == NULL)
 283                return -ENOMEM;
 284        memcpy(entry->name, name, HEXDIR_LEN - 1);
 285        entry->name[HEXDIR_LEN - 1] = '\0';
 286        list_add(&entry->list, &ctx->names);
 287        return 0;
 288}
 289
 290static int
 291nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 292{
 293        const struct cred *original_cred;
 294        struct dentry *dir = nn->rec_file->f_path.dentry;
 295        struct nfs4_dir_ctx ctx = {
 296                .ctx.actor = nfsd4_build_namelist,
 297                .names = LIST_HEAD_INIT(ctx.names)
 298        };
 299        struct name_list *entry, *tmp;
 300        int status;
 301
 302        status = nfs4_save_creds(&original_cred);
 303        if (status < 0)
 304                return status;
 305
 306        status = vfs_llseek(nn->rec_file, 0, SEEK_SET);
 307        if (status < 0) {
 308                nfs4_reset_creds(original_cred);
 309                return status;
 310        }
 311
 312        status = iterate_dir(nn->rec_file, &ctx.ctx);
 313        inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 314
 315        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
 316                if (!status) {
 317                        struct dentry *dentry;
 318                        dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
 319                        if (IS_ERR(dentry)) {
 320                                status = PTR_ERR(dentry);
 321                                break;
 322                        }
 323                        status = f(dir, dentry, nn);
 324                        dput(dentry);
 325                }
 326                list_del(&entry->list);
 327                kfree(entry);
 328        }
 329        inode_unlock(d_inode(dir));
 330        nfs4_reset_creds(original_cred);
 331
 332        list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
 333                dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name);
 334                list_del(&entry->list);
 335                kfree(entry);
 336        }
 337        return status;
 338}
 339
 340static int
 341nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
 342{
 343        struct dentry *dir, *dentry;
 344        int status;
 345
 346        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 347
 348        dir = nn->rec_file->f_path.dentry;
 349        inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 350        dentry = lookup_one_len(name, dir, namlen);
 351        if (IS_ERR(dentry)) {
 352                status = PTR_ERR(dentry);
 353                goto out_unlock;
 354        }
 355        status = -ENOENT;
 356        if (d_really_is_negative(dentry))
 357                goto out;
 358        status = vfs_rmdir(d_inode(dir), dentry);
 359out:
 360        dput(dentry);
 361out_unlock:
 362        inode_unlock(d_inode(dir));
 363        return status;
 364}
 365
 366static void
 367__nfsd4_remove_reclaim_record_grace(const char *dname, int len,
 368                struct nfsd_net *nn)
 369{
 370        struct xdr_netobj name;
 371        struct nfs4_client_reclaim *crp;
 372
 373        name.data = kmemdup(dname, len, GFP_KERNEL);
 374        if (!name.data) {
 375                dprintk("%s: failed to allocate memory for name.data!\n",
 376                        __func__);
 377                return;
 378        }
 379        name.len = len;
 380        crp = nfsd4_find_reclaim_client(name, nn);
 381        kfree(name.data);
 382        if (crp)
 383                nfs4_remove_reclaim_record(crp, nn);
 384}
 385
 386static void
 387nfsd4_remove_clid_dir(struct nfs4_client *clp)
 388{
 389        const struct cred *original_cred;
 390        char dname[HEXDIR_LEN];
 391        int status;
 392        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 393
 394        if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 395                return;
 396
 397        status = nfs4_make_rec_clidname(dname, &clp->cl_name);
 398        if (status)
 399                return legacy_recdir_name_error(clp, status);
 400
 401        status = mnt_want_write_file(nn->rec_file);
 402        if (status)
 403                goto out;
 404        clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 405
 406        status = nfs4_save_creds(&original_cred);
 407        if (status < 0)
 408                goto out_drop_write;
 409
 410        status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
 411        nfs4_reset_creds(original_cred);
 412        if (status == 0) {
 413                vfs_fsync(nn->rec_file, 0);
 414                if (nn->in_grace)
 415                        __nfsd4_remove_reclaim_record_grace(dname,
 416                                        HEXDIR_LEN, nn);
 417        }
 418out_drop_write:
 419        mnt_drop_write_file(nn->rec_file);
 420out:
 421        if (status)
 422                printk("NFSD: Failed to remove expired client state directory"
 423                                " %.*s\n", HEXDIR_LEN, dname);
 424}
 425
 426static int
 427purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 428{
 429        int status;
 430        struct xdr_netobj name;
 431
 432        if (child->d_name.len != HEXDIR_LEN - 1) {
 433                printk("%s: illegal name %pd in recovery directory\n",
 434                                __func__, child);
 435                /* Keep trying; maybe the others are OK: */
 436                return 0;
 437        }
 438        name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
 439        if (!name.data) {
 440                dprintk("%s: failed to allocate memory for name.data!\n",
 441                        __func__);
 442                goto out;
 443        }
 444        name.len = HEXDIR_LEN;
 445        if (nfs4_has_reclaimed_state(name, nn))
 446                goto out_free;
 447
 448        status = vfs_rmdir(d_inode(parent), child);
 449        if (status)
 450                printk("failed to remove client recovery directory %pd\n",
 451                                child);
 452out_free:
 453        kfree(name.data);
 454out:
 455        /* Keep trying, success or failure: */
 456        return 0;
 457}
 458
 459static void
 460nfsd4_recdir_purge_old(struct nfsd_net *nn)
 461{
 462        int status;
 463
 464        nn->in_grace = false;
 465        if (!nn->rec_file)
 466                return;
 467        status = mnt_want_write_file(nn->rec_file);
 468        if (status)
 469                goto out;
 470        status = nfsd4_list_rec_dir(purge_old, nn);
 471        if (status == 0)
 472                vfs_fsync(nn->rec_file, 0);
 473        mnt_drop_write_file(nn->rec_file);
 474out:
 475        nfs4_release_reclaim(nn);
 476        if (status)
 477                printk("nfsd4: failed to purge old clients from recovery"
 478                        " directory %pD\n", nn->rec_file);
 479}
 480
 481static int
 482load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 483{
 484        struct xdr_netobj name;
 485
 486        if (child->d_name.len != HEXDIR_LEN - 1) {
 487                printk("%s: illegal name %pd in recovery directory\n",
 488                                __func__, child);
 489                /* Keep trying; maybe the others are OK: */
 490                return 0;
 491        }
 492        name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
 493        if (!name.data) {
 494                dprintk("%s: failed to allocate memory for name.data!\n",
 495                        __func__);
 496                goto out;
 497        }
 498        name.len = HEXDIR_LEN;
 499        if (!nfs4_client_to_reclaim(name, nn))
 500                kfree(name.data);
 501out:
 502        return 0;
 503}
 504
 505static int
 506nfsd4_recdir_load(struct net *net) {
 507        int status;
 508        struct nfsd_net *nn =  net_generic(net, nfsd_net_id);
 509
 510        if (!nn->rec_file)
 511                return 0;
 512
 513        status = nfsd4_list_rec_dir(load_recdir, nn);
 514        if (status)
 515                printk("nfsd4: failed loading clients from recovery"
 516                        " directory %pD\n", nn->rec_file);
 517        return status;
 518}
 519
 520/*
 521 * Hold reference to the recovery directory.
 522 */
 523
 524static int
 525nfsd4_init_recdir(struct net *net)
 526{
 527        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 528        const struct cred *original_cred;
 529        int status;
 530
 531        printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
 532                        user_recovery_dirname);
 533
 534        BUG_ON(nn->rec_file);
 535
 536        status = nfs4_save_creds(&original_cred);
 537        if (status < 0) {
 538                printk("NFSD: Unable to change credentials to find recovery"
 539                       " directory: error %d\n",
 540                       status);
 541                return status;
 542        }
 543
 544        nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
 545        if (IS_ERR(nn->rec_file)) {
 546                printk("NFSD: unable to find recovery directory %s\n",
 547                                user_recovery_dirname);
 548                status = PTR_ERR(nn->rec_file);
 549                nn->rec_file = NULL;
 550        }
 551
 552        nfs4_reset_creds(original_cred);
 553        if (!status)
 554                nn->in_grace = true;
 555        return status;
 556}
 557
 558static void
 559nfsd4_shutdown_recdir(struct net *net)
 560{
 561        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 562
 563        if (!nn->rec_file)
 564                return;
 565        fput(nn->rec_file);
 566        nn->rec_file = NULL;
 567}
 568
 569static int
 570nfs4_legacy_state_init(struct net *net)
 571{
 572        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 573        int i;
 574
 575        nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
 576                                                sizeof(struct list_head),
 577                                                GFP_KERNEL);
 578        if (!nn->reclaim_str_hashtbl)
 579                return -ENOMEM;
 580
 581        for (i = 0; i < CLIENT_HASH_SIZE; i++)
 582                INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
 583        nn->reclaim_str_hashtbl_size = 0;
 584
 585        return 0;
 586}
 587
 588static void
 589nfs4_legacy_state_shutdown(struct net *net)
 590{
 591        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 592
 593        kfree(nn->reclaim_str_hashtbl);
 594}
 595
 596static int
 597nfsd4_load_reboot_recovery_data(struct net *net)
 598{
 599        int status;
 600
 601        status = nfsd4_init_recdir(net);
 602        if (status)
 603                return status;
 604
 605        status = nfsd4_recdir_load(net);
 606        if (status)
 607                nfsd4_shutdown_recdir(net);
 608
 609        return status;
 610}
 611
 612static int
 613nfsd4_legacy_tracking_init(struct net *net)
 614{
 615        int status;
 616
 617        /* XXX: The legacy code won't work in a container */
 618        if (net != &init_net) {
 619                pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n");
 620                return -EINVAL;
 621        }
 622
 623        status = nfs4_legacy_state_init(net);
 624        if (status)
 625                return status;
 626
 627        status = nfsd4_load_reboot_recovery_data(net);
 628        if (status)
 629                goto err;
 630        printk("NFSD: Using legacy client tracking operations.\n");
 631        return 0;
 632
 633err:
 634        nfs4_legacy_state_shutdown(net);
 635        return status;
 636}
 637
 638static void
 639nfsd4_legacy_tracking_exit(struct net *net)
 640{
 641        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 642
 643        nfs4_release_reclaim(nn);
 644        nfsd4_shutdown_recdir(net);
 645        nfs4_legacy_state_shutdown(net);
 646}
 647
 648/*
 649 * Change the NFSv4 recovery directory to recdir.
 650 */
 651int
 652nfs4_reset_recoverydir(char *recdir)
 653{
 654        int status;
 655        struct path path;
 656
 657        status = kern_path(recdir, LOOKUP_FOLLOW, &path);
 658        if (status)
 659                return status;
 660        status = -ENOTDIR;
 661        if (d_is_dir(path.dentry)) {
 662                strcpy(user_recovery_dirname, recdir);
 663                status = 0;
 664        }
 665        path_put(&path);
 666        return status;
 667}
 668
 669char *
 670nfs4_recoverydir(void)
 671{
 672        return user_recovery_dirname;
 673}
 674
 675static int
 676nfsd4_check_legacy_client(struct nfs4_client *clp)
 677{
 678        int status;
 679        char dname[HEXDIR_LEN];
 680        struct nfs4_client_reclaim *crp;
 681        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 682        struct xdr_netobj name;
 683
 684        /* did we already find that this client is stable? */
 685        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 686                return 0;
 687
 688        status = nfs4_make_rec_clidname(dname, &clp->cl_name);
 689        if (status) {
 690                legacy_recdir_name_error(clp, status);
 691                return status;
 692        }
 693
 694        /* look for it in the reclaim hashtable otherwise */
 695        name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
 696        if (!name.data) {
 697                dprintk("%s: failed to allocate memory for name.data!\n",
 698                        __func__);
 699                goto out_enoent;
 700        }
 701        name.len = HEXDIR_LEN;
 702        crp = nfsd4_find_reclaim_client(name, nn);
 703        kfree(name.data);
 704        if (crp) {
 705                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 706                crp->cr_clp = clp;
 707                return 0;
 708        }
 709
 710out_enoent:
 711        return -ENOENT;
 712}
 713
 714static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
 715        .init           = nfsd4_legacy_tracking_init,
 716        .exit           = nfsd4_legacy_tracking_exit,
 717        .create         = nfsd4_create_clid_dir,
 718        .remove         = nfsd4_remove_clid_dir,
 719        .check          = nfsd4_check_legacy_client,
 720        .grace_done     = nfsd4_recdir_purge_old,
 721};
 722
 723/* Globals */
 724#define NFSD_PIPE_DIR           "nfsd"
 725#define NFSD_CLD_PIPE           "cld"
 726
 727/* per-net-ns structure for holding cld upcall info */
 728struct cld_net {
 729        struct rpc_pipe         *cn_pipe;
 730        spinlock_t               cn_lock;
 731        struct list_head         cn_list;
 732        unsigned int             cn_xid;
 733        bool                     cn_has_legacy;
 734};
 735
 736struct cld_upcall {
 737        struct list_head         cu_list;
 738        struct cld_net          *cu_net;
 739        struct completion        cu_done;
 740        struct cld_msg           cu_msg;
 741};
 742
 743static int
 744__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
 745{
 746        int ret;
 747        struct rpc_pipe_msg msg;
 748        struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
 749
 750        memset(&msg, 0, sizeof(msg));
 751        msg.data = cmsg;
 752        msg.len = sizeof(*cmsg);
 753
 754        ret = rpc_queue_upcall(pipe, &msg);
 755        if (ret < 0) {
 756                goto out;
 757        }
 758
 759        wait_for_completion(&cup->cu_done);
 760
 761        if (msg.errno < 0)
 762                ret = msg.errno;
 763out:
 764        return ret;
 765}
 766
 767static int
 768cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
 769{
 770        int ret;
 771
 772        /*
 773         * -EAGAIN occurs when pipe is closed and reopened while there are
 774         *  upcalls queued.
 775         */
 776        do {
 777                ret = __cld_pipe_upcall(pipe, cmsg);
 778        } while (ret == -EAGAIN);
 779
 780        return ret;
 781}
 782
 783static ssize_t
 784__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
 785                struct nfsd_net *nn)
 786{
 787        uint8_t cmd;
 788        struct xdr_netobj name;
 789        uint16_t namelen;
 790        struct cld_net *cn = nn->cld_net;
 791
 792        if (get_user(cmd, &cmsg->cm_cmd)) {
 793                dprintk("%s: error when copying cmd from userspace", __func__);
 794                return -EFAULT;
 795        }
 796        if (cmd == Cld_GraceStart) {
 797                if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
 798                        return -EFAULT;
 799                name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
 800                if (IS_ERR_OR_NULL(name.data))
 801                        return -EFAULT;
 802                name.len = namelen;
 803                if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
 804                        name.len = name.len - 5;
 805                        memmove(name.data, name.data + 5, name.len);
 806                        cn->cn_has_legacy = true;
 807                }
 808                if (!nfs4_client_to_reclaim(name, nn)) {
 809                        kfree(name.data);
 810                        return -EFAULT;
 811                }
 812                return sizeof(*cmsg);
 813        }
 814        return -EFAULT;
 815}
 816
 817static ssize_t
 818cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 819{
 820        struct cld_upcall *tmp, *cup;
 821        struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
 822        uint32_t xid;
 823        struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
 824                                                nfsd_net_id);
 825        struct cld_net *cn = nn->cld_net;
 826        int16_t status;
 827
 828        if (mlen != sizeof(*cmsg)) {
 829                dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
 830                        sizeof(*cmsg));
 831                return -EINVAL;
 832        }
 833
 834        /* copy just the xid so we can try to find that */
 835        if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
 836                dprintk("%s: error when copying xid from userspace", __func__);
 837                return -EFAULT;
 838        }
 839
 840        /*
 841         * copy the status so we know whether to remove the upcall from the
 842         * list (for -EINPROGRESS, we just want to make sure the xid is
 843         * valid, not remove the upcall from the list)
 844         */
 845        if (get_user(status, &cmsg->cm_status)) {
 846                dprintk("%s: error when copying status from userspace", __func__);
 847                return -EFAULT;
 848        }
 849
 850        /* walk the list and find corresponding xid */
 851        cup = NULL;
 852        spin_lock(&cn->cn_lock);
 853        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
 854                if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
 855                        cup = tmp;
 856                        if (status != -EINPROGRESS)
 857                                list_del_init(&cup->cu_list);
 858                        break;
 859                }
 860        }
 861        spin_unlock(&cn->cn_lock);
 862
 863        /* couldn't find upcall? */
 864        if (!cup) {
 865                dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid);
 866                return -EINVAL;
 867        }
 868
 869        if (status == -EINPROGRESS)
 870                return __cld_pipe_inprogress_downcall(cmsg, nn);
 871
 872        if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
 873                return -EFAULT;
 874
 875        complete(&cup->cu_done);
 876        return mlen;
 877}
 878
 879static void
 880cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 881{
 882        struct cld_msg *cmsg = msg->data;
 883        struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
 884                                                 cu_msg);
 885
 886        /* errno >= 0 means we got a downcall */
 887        if (msg->errno >= 0)
 888                return;
 889
 890        complete(&cup->cu_done);
 891}
 892
 893static const struct rpc_pipe_ops cld_upcall_ops = {
 894        .upcall         = rpc_pipe_generic_upcall,
 895        .downcall       = cld_pipe_downcall,
 896        .destroy_msg    = cld_pipe_destroy_msg,
 897};
 898
 899static struct dentry *
 900nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
 901{
 902        struct dentry *dir, *dentry;
 903
 904        dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
 905        if (dir == NULL)
 906                return ERR_PTR(-ENOENT);
 907        dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
 908        dput(dir);
 909        return dentry;
 910}
 911
 912static void
 913nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
 914{
 915        if (pipe->dentry)
 916                rpc_unlink(pipe->dentry);
 917}
 918
 919static struct dentry *
 920nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
 921{
 922        struct super_block *sb;
 923        struct dentry *dentry;
 924
 925        sb = rpc_get_sb_net(net);
 926        if (!sb)
 927                return NULL;
 928        dentry = nfsd4_cld_register_sb(sb, pipe);
 929        rpc_put_sb_net(net);
 930        return dentry;
 931}
 932
 933static void
 934nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
 935{
 936        struct super_block *sb;
 937
 938        sb = rpc_get_sb_net(net);
 939        if (sb) {
 940                nfsd4_cld_unregister_sb(pipe);
 941                rpc_put_sb_net(net);
 942        }
 943}
 944
 945/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
 946static int
 947__nfsd4_init_cld_pipe(struct net *net)
 948{
 949        int ret;
 950        struct dentry *dentry;
 951        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 952        struct cld_net *cn;
 953
 954        if (nn->cld_net)
 955                return 0;
 956
 957        cn = kzalloc(sizeof(*cn), GFP_KERNEL);
 958        if (!cn) {
 959                ret = -ENOMEM;
 960                goto err;
 961        }
 962
 963        cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
 964        if (IS_ERR(cn->cn_pipe)) {
 965                ret = PTR_ERR(cn->cn_pipe);
 966                goto err;
 967        }
 968        spin_lock_init(&cn->cn_lock);
 969        INIT_LIST_HEAD(&cn->cn_list);
 970
 971        dentry = nfsd4_cld_register_net(net, cn->cn_pipe);
 972        if (IS_ERR(dentry)) {
 973                ret = PTR_ERR(dentry);
 974                goto err_destroy_data;
 975        }
 976
 977        cn->cn_pipe->dentry = dentry;
 978        cn->cn_has_legacy = false;
 979        nn->cld_net = cn;
 980        return 0;
 981
 982err_destroy_data:
 983        rpc_destroy_pipe_data(cn->cn_pipe);
 984err:
 985        kfree(cn);
 986        printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n",
 987                        ret);
 988        return ret;
 989}
 990
 991static int
 992nfsd4_init_cld_pipe(struct net *net)
 993{
 994        int status;
 995
 996        status = __nfsd4_init_cld_pipe(net);
 997        if (!status)
 998                printk("NFSD: Using old nfsdcld client tracking operations.\n");
 999        return status;
1000}
1001
1002static void
1003nfsd4_remove_cld_pipe(struct net *net)
1004{
1005        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1006        struct cld_net *cn = nn->cld_net;
1007
1008        nfsd4_cld_unregister_net(net, cn->cn_pipe);
1009        rpc_destroy_pipe_data(cn->cn_pipe);
1010        kfree(nn->cld_net);
1011        nn->cld_net = NULL;
1012}
1013
1014static struct cld_upcall *
1015alloc_cld_upcall(struct cld_net *cn)
1016{
1017        struct cld_upcall *new, *tmp;
1018
1019        new = kzalloc(sizeof(*new), GFP_KERNEL);
1020        if (!new)
1021                return new;
1022
1023        /* FIXME: hard cap on number in flight? */
1024restart_search:
1025        spin_lock(&cn->cn_lock);
1026        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
1027                if (tmp->cu_msg.cm_xid == cn->cn_xid) {
1028                        cn->cn_xid++;
1029                        spin_unlock(&cn->cn_lock);
1030                        goto restart_search;
1031                }
1032        }
1033        init_completion(&new->cu_done);
1034        new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
1035        put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
1036        new->cu_net = cn;
1037        list_add(&new->cu_list, &cn->cn_list);
1038        spin_unlock(&cn->cn_lock);
1039
1040        dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
1041
1042        return new;
1043}
1044
1045static void
1046free_cld_upcall(struct cld_upcall *victim)
1047{
1048        struct cld_net *cn = victim->cu_net;
1049
1050        spin_lock(&cn->cn_lock);
1051        list_del(&victim->cu_list);
1052        spin_unlock(&cn->cn_lock);
1053        kfree(victim);
1054}
1055
1056/* Ask daemon to create a new record */
1057static void
1058nfsd4_cld_create(struct nfs4_client *clp)
1059{
1060        int ret;
1061        struct cld_upcall *cup;
1062        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1063        struct cld_net *cn = nn->cld_net;
1064
1065        /* Don't upcall if it's already stored */
1066        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1067                return;
1068
1069        cup = alloc_cld_upcall(cn);
1070        if (!cup) {
1071                ret = -ENOMEM;
1072                goto out_err;
1073        }
1074
1075        cup->cu_msg.cm_cmd = Cld_Create;
1076        cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1077        memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1078                        clp->cl_name.len);
1079
1080        ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1081        if (!ret) {
1082                ret = cup->cu_msg.cm_status;
1083                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1084        }
1085
1086        free_cld_upcall(cup);
1087out_err:
1088        if (ret)
1089                printk(KERN_ERR "NFSD: Unable to create client "
1090                                "record on stable storage: %d\n", ret);
1091}
1092
1093/* Ask daemon to create a new record */
1094static void
1095nfsd4_cld_remove(struct nfs4_client *clp)
1096{
1097        int ret;
1098        struct cld_upcall *cup;
1099        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1100        struct cld_net *cn = nn->cld_net;
1101
1102        /* Don't upcall if it's already removed */
1103        if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1104                return;
1105
1106        cup = alloc_cld_upcall(cn);
1107        if (!cup) {
1108                ret = -ENOMEM;
1109                goto out_err;
1110        }
1111
1112        cup->cu_msg.cm_cmd = Cld_Remove;
1113        cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1114        memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1115                        clp->cl_name.len);
1116
1117        ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1118        if (!ret) {
1119                ret = cup->cu_msg.cm_status;
1120                clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1121        }
1122
1123        free_cld_upcall(cup);
1124out_err:
1125        if (ret)
1126                printk(KERN_ERR "NFSD: Unable to remove client "
1127                                "record from stable storage: %d\n", ret);
1128}
1129
1130/*
1131 * For older nfsdcld's that do not allow us to "slurp" the clients
1132 * from the tracking database during startup.
1133 *
1134 * Check for presence of a record, and update its timestamp
1135 */
1136static int
1137nfsd4_cld_check_v0(struct nfs4_client *clp)
1138{
1139        int ret;
1140        struct cld_upcall *cup;
1141        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1142        struct cld_net *cn = nn->cld_net;
1143
1144        /* Don't upcall if one was already stored during this grace pd */
1145        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1146                return 0;
1147
1148        cup = alloc_cld_upcall(cn);
1149        if (!cup) {
1150                printk(KERN_ERR "NFSD: Unable to check client record on "
1151                                "stable storage: %d\n", -ENOMEM);
1152                return -ENOMEM;
1153        }
1154
1155        cup->cu_msg.cm_cmd = Cld_Check;
1156        cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1157        memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1158                        clp->cl_name.len);
1159
1160        ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1161        if (!ret) {
1162                ret = cup->cu_msg.cm_status;
1163                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1164        }
1165
1166        free_cld_upcall(cup);
1167        return ret;
1168}
1169
1170/*
1171 * For newer nfsdcld's that allow us to "slurp" the clients
1172 * from the tracking database during startup.
1173 *
1174 * Check for presence of a record in the reclaim_str_hashtbl
1175 */
1176static int
1177nfsd4_cld_check(struct nfs4_client *clp)
1178{
1179        struct nfs4_client_reclaim *crp;
1180        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1181        struct cld_net *cn = nn->cld_net;
1182        int status;
1183        char dname[HEXDIR_LEN];
1184        struct xdr_netobj name;
1185
1186        /* did we already find that this client is stable? */
1187        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1188                return 0;
1189
1190        /* look for it in the reclaim hashtable otherwise */
1191        crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
1192        if (crp)
1193                goto found;
1194
1195        if (cn->cn_has_legacy) {
1196                status = nfs4_make_rec_clidname(dname, &clp->cl_name);
1197                if (status)
1198                        return -ENOENT;
1199
1200                name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
1201                if (!name.data) {
1202                        dprintk("%s: failed to allocate memory for name.data!\n",
1203                                __func__);
1204                        return -ENOENT;
1205                }
1206                name.len = HEXDIR_LEN;
1207                crp = nfsd4_find_reclaim_client(name, nn);
1208                kfree(name.data);
1209                if (crp)
1210                        goto found;
1211
1212        }
1213        return -ENOENT;
1214found:
1215        crp->cr_clp = clp;
1216        return 0;
1217}
1218
1219static int
1220nfsd4_cld_grace_start(struct nfsd_net *nn)
1221{
1222        int ret;
1223        struct cld_upcall *cup;
1224        struct cld_net *cn = nn->cld_net;
1225
1226        cup = alloc_cld_upcall(cn);
1227        if (!cup) {
1228                ret = -ENOMEM;
1229                goto out_err;
1230        }
1231
1232        cup->cu_msg.cm_cmd = Cld_GraceStart;
1233        ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1234        if (!ret)
1235                ret = cup->cu_msg.cm_status;
1236
1237        free_cld_upcall(cup);
1238out_err:
1239        if (ret)
1240                dprintk("%s: Unable to get clients from userspace: %d\n",
1241                        __func__, ret);
1242        return ret;
1243}
1244
1245/* For older nfsdcld's that need cm_gracetime */
1246static void
1247nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
1248{
1249        int ret;
1250        struct cld_upcall *cup;
1251        struct cld_net *cn = nn->cld_net;
1252
1253        cup = alloc_cld_upcall(cn);
1254        if (!cup) {
1255                ret = -ENOMEM;
1256                goto out_err;
1257        }
1258
1259        cup->cu_msg.cm_cmd = Cld_GraceDone;
1260        cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
1261        ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1262        if (!ret)
1263                ret = cup->cu_msg.cm_status;
1264
1265        free_cld_upcall(cup);
1266out_err:
1267        if (ret)
1268                printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
1269}
1270
1271/*
1272 * For newer nfsdcld's that do not need cm_gracetime.  We also need to call
1273 * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl.
1274 */
1275static void
1276nfsd4_cld_grace_done(struct nfsd_net *nn)
1277{
1278        int ret;
1279        struct cld_upcall *cup;
1280        struct cld_net *cn = nn->cld_net;
1281
1282        cup = alloc_cld_upcall(cn);
1283        if (!cup) {
1284                ret = -ENOMEM;
1285                goto out_err;
1286        }
1287
1288        cup->cu_msg.cm_cmd = Cld_GraceDone;
1289        ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1290        if (!ret)
1291                ret = cup->cu_msg.cm_status;
1292
1293        free_cld_upcall(cup);
1294out_err:
1295        nfs4_release_reclaim(nn);
1296        if (ret)
1297                printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
1298}
1299
1300static int
1301nfs4_cld_state_init(struct net *net)
1302{
1303        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1304        int i;
1305
1306        nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
1307                                                sizeof(struct list_head),
1308                                                GFP_KERNEL);
1309        if (!nn->reclaim_str_hashtbl)
1310                return -ENOMEM;
1311
1312        for (i = 0; i < CLIENT_HASH_SIZE; i++)
1313                INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
1314        nn->reclaim_str_hashtbl_size = 0;
1315        nn->track_reclaim_completes = true;
1316        atomic_set(&nn->nr_reclaim_complete, 0);
1317
1318        return 0;
1319}
1320
1321static void
1322nfs4_cld_state_shutdown(struct net *net)
1323{
1324        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1325
1326        nn->track_reclaim_completes = false;
1327        kfree(nn->reclaim_str_hashtbl);
1328}
1329
1330static bool
1331cld_running(struct nfsd_net *nn)
1332{
1333        struct cld_net *cn = nn->cld_net;
1334        struct rpc_pipe *pipe = cn->cn_pipe;
1335
1336        return pipe->nreaders || pipe->nwriters;
1337}
1338
1339static int
1340nfsd4_cld_tracking_init(struct net *net)
1341{
1342        int status;
1343        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1344        bool running;
1345        int retries = 10;
1346
1347        status = nfs4_cld_state_init(net);
1348        if (status)
1349                return status;
1350
1351        status = __nfsd4_init_cld_pipe(net);
1352        if (status)
1353                goto err_shutdown;
1354
1355        /*
1356         * rpc pipe upcalls take 30 seconds to time out, so we don't want to
1357         * queue an upcall unless we know that nfsdcld is running (because we
1358         * want this to fail fast so that nfsd4_client_tracking_init() can try
1359         * the next client tracking method).  nfsdcld should already be running
1360         * before nfsd is started, so the wait here is for nfsdcld to open the
1361         * pipefs file we just created.
1362         */
1363        while (!(running = cld_running(nn)) && retries--)
1364                msleep(100);
1365
1366        if (!running) {
1367                status = -ETIMEDOUT;
1368                goto err_remove;
1369        }
1370
1371        status = nfsd4_cld_grace_start(nn);
1372        if (status) {
1373                if (status == -EOPNOTSUPP)
1374                        printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
1375                nfs4_release_reclaim(nn);
1376                goto err_remove;
1377        } else
1378                printk("NFSD: Using nfsdcld client tracking operations.\n");
1379        return 0;
1380
1381err_remove:
1382        nfsd4_remove_cld_pipe(net);
1383err_shutdown:
1384        nfs4_cld_state_shutdown(net);
1385        return status;
1386}
1387
1388static void
1389nfsd4_cld_tracking_exit(struct net *net)
1390{
1391        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1392
1393        nfs4_release_reclaim(nn);
1394        nfsd4_remove_cld_pipe(net);
1395        nfs4_cld_state_shutdown(net);
1396}
1397
1398/* For older nfsdcld's */
1399static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
1400        .init           = nfsd4_init_cld_pipe,
1401        .exit           = nfsd4_remove_cld_pipe,
1402        .create         = nfsd4_cld_create,
1403        .remove         = nfsd4_cld_remove,
1404        .check          = nfsd4_cld_check_v0,
1405        .grace_done     = nfsd4_cld_grace_done_v0,
1406};
1407
1408/* For newer nfsdcld's */
1409static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
1410        .init           = nfsd4_cld_tracking_init,
1411        .exit           = nfsd4_cld_tracking_exit,
1412        .create         = nfsd4_cld_create,
1413        .remove         = nfsd4_cld_remove,
1414        .check          = nfsd4_cld_check,
1415        .grace_done     = nfsd4_cld_grace_done,
1416};
1417
1418/* upcall via usermodehelper */
1419static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack";
1420module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
1421                        S_IRUGO|S_IWUSR);
1422MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program");
1423
1424static bool cltrack_legacy_disable;
1425module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR);
1426MODULE_PARM_DESC(cltrack_legacy_disable,
1427                "Disable legacy recoverydir conversion. Default: false");
1428
1429#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
1430#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
1431#define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION="
1432#define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START="
1433
1434static char *
1435nfsd4_cltrack_legacy_topdir(void)
1436{
1437        int copied;
1438        size_t len;
1439        char *result;
1440
1441        if (cltrack_legacy_disable)
1442                return NULL;
1443
1444        len = strlen(LEGACY_TOPDIR_ENV_PREFIX) +
1445                strlen(nfs4_recoverydir()) + 1;
1446
1447        result = kmalloc(len, GFP_KERNEL);
1448        if (!result)
1449                return result;
1450
1451        copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s",
1452                                nfs4_recoverydir());
1453        if (copied >= len) {
1454                /* just return nothing if output was truncated */
1455                kfree(result);
1456                return NULL;
1457        }
1458
1459        return result;
1460}
1461
1462static char *
1463nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
1464{
1465        int copied;
1466        size_t len;
1467        char *result;
1468
1469        if (cltrack_legacy_disable)
1470                return NULL;
1471
1472        /* +1 is for '/' between "topdir" and "recdir" */
1473        len = strlen(LEGACY_RECDIR_ENV_PREFIX) +
1474                strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN;
1475
1476        result = kmalloc(len, GFP_KERNEL);
1477        if (!result)
1478                return result;
1479
1480        copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/",
1481                                nfs4_recoverydir());
1482        if (copied > (len - HEXDIR_LEN)) {
1483                /* just return nothing if output will be truncated */
1484                kfree(result);
1485                return NULL;
1486        }
1487
1488        copied = nfs4_make_rec_clidname(result + copied, name);
1489        if (copied) {
1490                kfree(result);
1491                return NULL;
1492        }
1493
1494        return result;
1495}
1496
1497static char *
1498nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
1499{
1500        int copied;
1501        size_t len;
1502        char *result;
1503
1504        /* prefix + Y/N character + terminating NULL */
1505        len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1;
1506
1507        result = kmalloc(len, GFP_KERNEL);
1508        if (!result)
1509                return result;
1510
1511        copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c",
1512                                clp->cl_minorversion ? 'Y' : 'N');
1513        if (copied >= len) {
1514                /* just return nothing if output was truncated */
1515                kfree(result);
1516                return NULL;
1517        }
1518
1519        return result;
1520}
1521
1522static char *
1523nfsd4_cltrack_grace_start(time_t grace_start)
1524{
1525        int copied;
1526        size_t len;
1527        char *result;
1528
1529        /* prefix + max width of int64_t string + terminating NULL */
1530        len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1;
1531
1532        result = kmalloc(len, GFP_KERNEL);
1533        if (!result)
1534                return result;
1535
1536        copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
1537                                grace_start);
1538        if (copied >= len) {
1539                /* just return nothing if output was truncated */
1540                kfree(result);
1541                return NULL;
1542        }
1543
1544        return result;
1545}
1546
1547static int
1548nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
1549{
1550        char *envp[3];
1551        char *argv[4];
1552        int ret;
1553
1554        if (unlikely(!cltrack_prog[0])) {
1555                dprintk("%s: cltrack_prog is disabled\n", __func__);
1556                return -EACCES;
1557        }
1558
1559        dprintk("%s: cmd: %s\n", __func__, cmd);
1560        dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
1561        dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)");
1562        dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)");
1563
1564        envp[0] = env0;
1565        envp[1] = env1;
1566        envp[2] = NULL;
1567
1568        argv[0] = (char *)cltrack_prog;
1569        argv[1] = cmd;
1570        argv[2] = arg;
1571        argv[3] = NULL;
1572
1573        ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1574        /*
1575         * Disable the upcall mechanism if we're getting an ENOENT or EACCES
1576         * error. The admin can re-enable it on the fly by using sysfs
1577         * once the problem has been fixed.
1578         */
1579        if (ret == -ENOENT || ret == -EACCES) {
1580                dprintk("NFSD: %s was not found or isn't executable (%d). "
1581                        "Setting cltrack_prog to blank string!",
1582                        cltrack_prog, ret);
1583                cltrack_prog[0] = '\0';
1584        }
1585        dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret);
1586
1587        return ret;
1588}
1589
1590static char *
1591bin_to_hex_dup(const unsigned char *src, int srclen)
1592{
1593        int i;
1594        char *buf, *hex;
1595
1596        /* +1 for terminating NULL */
1597        buf = kmalloc((srclen * 2) + 1, GFP_KERNEL);
1598        if (!buf)
1599                return buf;
1600
1601        hex = buf;
1602        for (i = 0; i < srclen; i++) {
1603                sprintf(hex, "%2.2x", *src++);
1604                hex += 2;
1605        }
1606        return buf;
1607}
1608
1609static int
1610nfsd4_umh_cltrack_init(struct net *net)
1611{
1612        int ret;
1613        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1614        char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1615
1616        /* XXX: The usermode helper s not working in container yet. */
1617        if (net != &init_net) {
1618                pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
1619                kfree(grace_start);
1620                return -EINVAL;
1621        }
1622
1623        ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
1624        kfree(grace_start);
1625        if (!ret)
1626                printk("NFSD: Using UMH upcall client tracking operations.\n");
1627        return ret;
1628}
1629
1630static void
1631nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
1632{
1633        wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK,
1634                         TASK_UNINTERRUPTIBLE);
1635}
1636
1637static void
1638nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
1639{
1640        smp_mb__before_atomic();
1641        clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
1642        smp_mb__after_atomic();
1643        wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
1644}
1645
1646static void
1647nfsd4_umh_cltrack_create(struct nfs4_client *clp)
1648{
1649        char *hexid, *has_session, *grace_start;
1650        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1651
1652        /*
1653         * With v4.0 clients, there's little difference in outcome between a
1654         * create and check operation, and we can end up calling into this
1655         * function multiple times per client (once for each openowner). So,
1656         * for v4.0 clients skip upcalling once the client has been recorded
1657         * on stable storage.
1658         *
1659         * For v4.1+ clients, the outcome of the two operations is different,
1660         * so we must ensure that we upcall for the create operation. v4.1+
1661         * clients call this on RECLAIM_COMPLETE though, so we should only end
1662         * up doing a single create upcall per client.
1663         */
1664        if (clp->cl_minorversion == 0 &&
1665            test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1666                return;
1667
1668        hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1669        if (!hexid) {
1670                dprintk("%s: can't allocate memory for upcall!\n", __func__);
1671                return;
1672        }
1673
1674        has_session = nfsd4_cltrack_client_has_session(clp);
1675        grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1676
1677        nfsd4_cltrack_upcall_lock(clp);
1678        if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start))
1679                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1680        nfsd4_cltrack_upcall_unlock(clp);
1681
1682        kfree(has_session);
1683        kfree(grace_start);
1684        kfree(hexid);
1685}
1686
1687static void
1688nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
1689{
1690        char *hexid;
1691
1692        if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1693                return;
1694
1695        hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1696        if (!hexid) {
1697                dprintk("%s: can't allocate memory for upcall!\n", __func__);
1698                return;
1699        }
1700
1701        nfsd4_cltrack_upcall_lock(clp);
1702        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) &&
1703            nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0)
1704                clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1705        nfsd4_cltrack_upcall_unlock(clp);
1706
1707        kfree(hexid);
1708}
1709
1710static int
1711nfsd4_umh_cltrack_check(struct nfs4_client *clp)
1712{
1713        int ret;
1714        char *hexid, *has_session, *legacy;
1715
1716        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1717                return 0;
1718
1719        hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1720        if (!hexid) {
1721                dprintk("%s: can't allocate memory for upcall!\n", __func__);
1722                return -ENOMEM;
1723        }
1724
1725        has_session = nfsd4_cltrack_client_has_session(clp);
1726        legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
1727
1728        nfsd4_cltrack_upcall_lock(clp);
1729        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) {
1730                ret = 0;
1731        } else {
1732                ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy);
1733                if (ret == 0)
1734                        set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1735        }
1736        nfsd4_cltrack_upcall_unlock(clp);
1737        kfree(has_session);
1738        kfree(legacy);
1739        kfree(hexid);
1740
1741        return ret;
1742}
1743
1744static void
1745nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
1746{
1747        char *legacy;
1748        char timestr[22]; /* FIXME: better way to determine max size? */
1749
1750        sprintf(timestr, "%ld", nn->boot_time);
1751        legacy = nfsd4_cltrack_legacy_topdir();
1752        nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
1753        kfree(legacy);
1754}
1755
1756static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
1757        .init           = nfsd4_umh_cltrack_init,
1758        .exit           = NULL,
1759        .create         = nfsd4_umh_cltrack_create,
1760        .remove         = nfsd4_umh_cltrack_remove,
1761        .check          = nfsd4_umh_cltrack_check,
1762        .grace_done     = nfsd4_umh_cltrack_grace_done,
1763};
1764
1765int
1766nfsd4_client_tracking_init(struct net *net)
1767{
1768        int status;
1769        struct path path;
1770        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1771
1772        /* just run the init if it the method is already decided */
1773        if (nn->client_tracking_ops)
1774                goto do_init;
1775
1776        /* First, try to use nfsdcld */
1777        nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
1778        status = nn->client_tracking_ops->init(net);
1779        if (!status)
1780                return status;
1781        if (status != -ETIMEDOUT) {
1782                nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0;
1783                status = nn->client_tracking_ops->init(net);
1784                if (!status)
1785                        return status;
1786        }
1787
1788        /*
1789         * Next, try the UMH upcall.
1790         */
1791        nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
1792        status = nn->client_tracking_ops->init(net);
1793        if (!status)
1794                return status;
1795
1796        /*
1797         * Finally, See if the recoverydir exists and is a directory.
1798         * If it is, then use the legacy ops.
1799         */
1800        nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
1801        status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
1802        if (!status) {
1803                status = d_is_dir(path.dentry);
1804                path_put(&path);
1805                if (!status) {
1806                        status = -EINVAL;
1807                        goto out;
1808                }
1809        }
1810
1811do_init:
1812        status = nn->client_tracking_ops->init(net);
1813out:
1814        if (status) {
1815                printk(KERN_WARNING "NFSD: Unable to initialize client "
1816                                    "recovery tracking! (%d)\n", status);
1817                nn->client_tracking_ops = NULL;
1818        }
1819        return status;
1820}
1821
1822void
1823nfsd4_client_tracking_exit(struct net *net)
1824{
1825        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1826
1827        if (nn->client_tracking_ops) {
1828                if (nn->client_tracking_ops->exit)
1829                        nn->client_tracking_ops->exit(net);
1830                nn->client_tracking_ops = NULL;
1831        }
1832}
1833
1834void
1835nfsd4_client_record_create(struct nfs4_client *clp)
1836{
1837        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1838
1839        if (nn->client_tracking_ops)
1840                nn->client_tracking_ops->create(clp);
1841}
1842
1843void
1844nfsd4_client_record_remove(struct nfs4_client *clp)
1845{
1846        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1847
1848        if (nn->client_tracking_ops)
1849                nn->client_tracking_ops->remove(clp);
1850}
1851
1852int
1853nfsd4_client_record_check(struct nfs4_client *clp)
1854{
1855        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1856
1857        if (nn->client_tracking_ops)
1858                return nn->client_tracking_ops->check(clp);
1859
1860        return -EOPNOTSUPP;
1861}
1862
1863void
1864nfsd4_record_grace_done(struct nfsd_net *nn)
1865{
1866        if (nn->client_tracking_ops)
1867                nn->client_tracking_ops->grace_done(nn);
1868}
1869
1870static int
1871rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
1872{
1873        struct super_block *sb = ptr;
1874        struct net *net = sb->s_fs_info;
1875        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1876        struct cld_net *cn = nn->cld_net;
1877        struct dentry *dentry;
1878        int ret = 0;
1879
1880        if (!try_module_get(THIS_MODULE))
1881                return 0;
1882
1883        if (!cn) {
1884                module_put(THIS_MODULE);
1885                return 0;
1886        }
1887
1888        switch (event) {
1889        case RPC_PIPEFS_MOUNT:
1890                dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe);
1891                if (IS_ERR(dentry)) {
1892                        ret = PTR_ERR(dentry);
1893                        break;
1894                }
1895                cn->cn_pipe->dentry = dentry;
1896                break;
1897        case RPC_PIPEFS_UMOUNT:
1898                if (cn->cn_pipe->dentry)
1899                        nfsd4_cld_unregister_sb(cn->cn_pipe);
1900                break;
1901        default:
1902                ret = -ENOTSUPP;
1903                break;
1904        }
1905        module_put(THIS_MODULE);
1906        return ret;
1907}
1908
1909static struct notifier_block nfsd4_cld_block = {
1910        .notifier_call = rpc_pipefs_event,
1911};
1912
1913int
1914register_cld_notifier(void)
1915{
1916        return rpc_pipefs_notifier_register(&nfsd4_cld_block);
1917}
1918
1919void
1920unregister_cld_notifier(void)
1921{
1922        rpc_pipefs_notifier_unregister(&nfsd4_cld_block);
1923}
1924