linux/fs/cifs/file.c
<<
>>
Prefs
   1// SPDX-License-Identifier: LGPL-2.1
   2/*
   3 *   fs/cifs/file.c
   4 *
   5 *   vfs operations that deal with files
   6 *
   7 *   Copyright (C) International Business Machines  Corp., 2002,2010
   8 *   Author(s): Steve French (sfrench@us.ibm.com)
   9 *              Jeremy Allison (jra@samba.org)
  10 *
  11 */
  12#include <linux/fs.h>
  13#include <linux/backing-dev.h>
  14#include <linux/stat.h>
  15#include <linux/fcntl.h>
  16#include <linux/pagemap.h>
  17#include <linux/pagevec.h>
  18#include <linux/writeback.h>
  19#include <linux/task_io_accounting_ops.h>
  20#include <linux/delay.h>
  21#include <linux/mount.h>
  22#include <linux/slab.h>
  23#include <linux/swap.h>
  24#include <linux/mm.h>
  25#include <asm/div64.h>
  26#include "cifsfs.h"
  27#include "cifspdu.h"
  28#include "cifsglob.h"
  29#include "cifsproto.h"
  30#include "cifs_unicode.h"
  31#include "cifs_debug.h"
  32#include "cifs_fs_sb.h"
  33#include "fscache.h"
  34#include "smbdirect.h"
  35#include "fs_context.h"
  36#include "cifs_ioctl.h"
  37
  38static inline int cifs_convert_flags(unsigned int flags)
  39{
  40        if ((flags & O_ACCMODE) == O_RDONLY)
  41                return GENERIC_READ;
  42        else if ((flags & O_ACCMODE) == O_WRONLY)
  43                return GENERIC_WRITE;
  44        else if ((flags & O_ACCMODE) == O_RDWR) {
  45                /* GENERIC_ALL is too much permission to request
  46                   can cause unnecessary access denied on create */
  47                /* return GENERIC_ALL; */
  48                return (GENERIC_READ | GENERIC_WRITE);
  49        }
  50
  51        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  52                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  53                FILE_READ_DATA);
  54}
  55
  56static u32 cifs_posix_convert_flags(unsigned int flags)
  57{
  58        u32 posix_flags = 0;
  59
  60        if ((flags & O_ACCMODE) == O_RDONLY)
  61                posix_flags = SMB_O_RDONLY;
  62        else if ((flags & O_ACCMODE) == O_WRONLY)
  63                posix_flags = SMB_O_WRONLY;
  64        else if ((flags & O_ACCMODE) == O_RDWR)
  65                posix_flags = SMB_O_RDWR;
  66
  67        if (flags & O_CREAT) {
  68                posix_flags |= SMB_O_CREAT;
  69                if (flags & O_EXCL)
  70                        posix_flags |= SMB_O_EXCL;
  71        } else if (flags & O_EXCL)
  72                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  73                         current->comm, current->tgid);
  74
  75        if (flags & O_TRUNC)
  76                posix_flags |= SMB_O_TRUNC;
  77        /* be safe and imply O_SYNC for O_DSYNC */
  78        if (flags & O_DSYNC)
  79                posix_flags |= SMB_O_SYNC;
  80        if (flags & O_DIRECTORY)
  81                posix_flags |= SMB_O_DIRECTORY;
  82        if (flags & O_NOFOLLOW)
  83                posix_flags |= SMB_O_NOFOLLOW;
  84        if (flags & O_DIRECT)
  85                posix_flags |= SMB_O_DIRECT;
  86
  87        return posix_flags;
  88}
  89
  90static inline int cifs_get_disposition(unsigned int flags)
  91{
  92        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
  93                return FILE_CREATE;
  94        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
  95                return FILE_OVERWRITE_IF;
  96        else if ((flags & O_CREAT) == O_CREAT)
  97                return FILE_OPEN_IF;
  98        else if ((flags & O_TRUNC) == O_TRUNC)
  99                return FILE_OVERWRITE;
 100        else
 101                return FILE_OPEN;
 102}
 103
 104int cifs_posix_open(const char *full_path, struct inode **pinode,
 105                        struct super_block *sb, int mode, unsigned int f_flags,
 106                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 107{
 108        int rc;
 109        FILE_UNIX_BASIC_INFO *presp_data;
 110        __u32 posix_flags = 0;
 111        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 112        struct cifs_fattr fattr;
 113        struct tcon_link *tlink;
 114        struct cifs_tcon *tcon;
 115
 116        cifs_dbg(FYI, "posix open %s\n", full_path);
 117
 118        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 119        if (presp_data == NULL)
 120                return -ENOMEM;
 121
 122        tlink = cifs_sb_tlink(cifs_sb);
 123        if (IS_ERR(tlink)) {
 124                rc = PTR_ERR(tlink);
 125                goto posix_open_ret;
 126        }
 127
 128        tcon = tlink_tcon(tlink);
 129        mode &= ~current_umask();
 130
 131        posix_flags = cifs_posix_convert_flags(f_flags);
 132        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 133                             poplock, full_path, cifs_sb->local_nls,
 134                             cifs_remap(cifs_sb));
 135        cifs_put_tlink(tlink);
 136
 137        if (rc)
 138                goto posix_open_ret;
 139
 140        if (presp_data->Type == cpu_to_le32(-1))
 141                goto posix_open_ret; /* open ok, caller does qpathinfo */
 142
 143        if (!pinode)
 144                goto posix_open_ret; /* caller does not need info */
 145
 146        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 147
 148        /* get new inode and set it up */
 149        if (*pinode == NULL) {
 150                cifs_fill_uniqueid(sb, &fattr);
 151                *pinode = cifs_iget(sb, &fattr);
 152                if (!*pinode) {
 153                        rc = -ENOMEM;
 154                        goto posix_open_ret;
 155                }
 156        } else {
 157                cifs_revalidate_mapping(*pinode);
 158                rc = cifs_fattr_to_inode(*pinode, &fattr);
 159        }
 160
 161posix_open_ret:
 162        kfree(presp_data);
 163        return rc;
 164}
 165
 166static int
 167cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 168             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 169             struct cifs_fid *fid, unsigned int xid)
 170{
 171        int rc;
 172        int desired_access;
 173        int disposition;
 174        int create_options = CREATE_NOT_DIR;
 175        FILE_ALL_INFO *buf;
 176        struct TCP_Server_Info *server = tcon->ses->server;
 177        struct cifs_open_parms oparms;
 178
 179        if (!server->ops->open)
 180                return -ENOSYS;
 181
 182        desired_access = cifs_convert_flags(f_flags);
 183
 184/*********************************************************************
 185 *  open flag mapping table:
 186 *
 187 *      POSIX Flag            CIFS Disposition
 188 *      ----------            ----------------
 189 *      O_CREAT               FILE_OPEN_IF
 190 *      O_CREAT | O_EXCL      FILE_CREATE
 191 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 192 *      O_TRUNC               FILE_OVERWRITE
 193 *      none of the above     FILE_OPEN
 194 *
 195 *      Note that there is not a direct match between disposition
 196 *      FILE_SUPERSEDE (ie create whether or not file exists although
 197 *      O_CREAT | O_TRUNC is similar but truncates the existing
 198 *      file rather than creating a new file as FILE_SUPERSEDE does
 199 *      (which uses the attributes / metadata passed in on open call)
 200 *?
 201 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 202 *?  and the read write flags match reasonably.  O_LARGEFILE
 203 *?  is irrelevant because largefile support is always used
 204 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 205 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 206 *********************************************************************/
 207
 208        disposition = cifs_get_disposition(f_flags);
 209
 210        /* BB pass O_SYNC flag through on file attributes .. BB */
 211
 212        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 213        if (!buf)
 214                return -ENOMEM;
 215
 216        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 217        if (f_flags & O_SYNC)
 218                create_options |= CREATE_WRITE_THROUGH;
 219
 220        if (f_flags & O_DIRECT)
 221                create_options |= CREATE_NO_BUFFER;
 222
 223        oparms.tcon = tcon;
 224        oparms.cifs_sb = cifs_sb;
 225        oparms.desired_access = desired_access;
 226        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 227        oparms.disposition = disposition;
 228        oparms.path = full_path;
 229        oparms.fid = fid;
 230        oparms.reconnect = false;
 231
 232        rc = server->ops->open(xid, &oparms, oplock, buf);
 233
 234        if (rc)
 235                goto out;
 236
 237        /* TODO: Add support for calling posix query info but with passing in fid */
 238        if (tcon->unix_ext)
 239                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 240                                              xid);
 241        else
 242                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 243                                         xid, fid);
 244
 245        if (rc) {
 246                server->ops->close(xid, tcon, fid);
 247                if (rc == -ESTALE)
 248                        rc = -EOPENSTALE;
 249        }
 250
 251out:
 252        kfree(buf);
 253        return rc;
 254}
 255
 256static bool
 257cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 258{
 259        struct cifs_fid_locks *cur;
 260        bool has_locks = false;
 261
 262        down_read(&cinode->lock_sem);
 263        list_for_each_entry(cur, &cinode->llist, llist) {
 264                if (!list_empty(&cur->locks)) {
 265                        has_locks = true;
 266                        break;
 267                }
 268        }
 269        up_read(&cinode->lock_sem);
 270        return has_locks;
 271}
 272
 273void
 274cifs_down_write(struct rw_semaphore *sem)
 275{
 276        while (!down_write_trylock(sem))
 277                msleep(10);
 278}
 279
 280static void cifsFileInfo_put_work(struct work_struct *work);
 281
 282struct cifsFileInfo *
 283cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 284                  struct tcon_link *tlink, __u32 oplock)
 285{
 286        struct dentry *dentry = file_dentry(file);
 287        struct inode *inode = d_inode(dentry);
 288        struct cifsInodeInfo *cinode = CIFS_I(inode);
 289        struct cifsFileInfo *cfile;
 290        struct cifs_fid_locks *fdlocks;
 291        struct cifs_tcon *tcon = tlink_tcon(tlink);
 292        struct TCP_Server_Info *server = tcon->ses->server;
 293
 294        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 295        if (cfile == NULL)
 296                return cfile;
 297
 298        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 299        if (!fdlocks) {
 300                kfree(cfile);
 301                return NULL;
 302        }
 303
 304        INIT_LIST_HEAD(&fdlocks->locks);
 305        fdlocks->cfile = cfile;
 306        cfile->llist = fdlocks;
 307
 308        cfile->count = 1;
 309        cfile->pid = current->tgid;
 310        cfile->uid = current_fsuid();
 311        cfile->dentry = dget(dentry);
 312        cfile->f_flags = file->f_flags;
 313        cfile->invalidHandle = false;
 314        cfile->deferred_close_scheduled = false;
 315        cfile->tlink = cifs_get_tlink(tlink);
 316        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 317        INIT_WORK(&cfile->put, cifsFileInfo_put_work);
 318        INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
 319        mutex_init(&cfile->fh_mutex);
 320        spin_lock_init(&cfile->file_info_lock);
 321
 322        cifs_sb_active(inode->i_sb);
 323
 324        /*
 325         * If the server returned a read oplock and we have mandatory brlocks,
 326         * set oplock level to None.
 327         */
 328        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 329                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 330                oplock = 0;
 331        }
 332
 333        cifs_down_write(&cinode->lock_sem);
 334        list_add(&fdlocks->llist, &cinode->llist);
 335        up_write(&cinode->lock_sem);
 336
 337        spin_lock(&tcon->open_file_lock);
 338        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 339                oplock = fid->pending_open->oplock;
 340        list_del(&fid->pending_open->olist);
 341
 342        fid->purge_cache = false;
 343        server->ops->set_fid(cfile, fid, oplock);
 344
 345        list_add(&cfile->tlist, &tcon->openFileList);
 346        atomic_inc(&tcon->num_local_opens);
 347
 348        /* if readable file instance put first in list*/
 349        spin_lock(&cinode->open_file_lock);
 350        if (file->f_mode & FMODE_READ)
 351                list_add(&cfile->flist, &cinode->openFileList);
 352        else
 353                list_add_tail(&cfile->flist, &cinode->openFileList);
 354        spin_unlock(&cinode->open_file_lock);
 355        spin_unlock(&tcon->open_file_lock);
 356
 357        if (fid->purge_cache)
 358                cifs_zap_mapping(inode);
 359
 360        file->private_data = cfile;
 361        return cfile;
 362}
 363
 364struct cifsFileInfo *
 365cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 366{
 367        spin_lock(&cifs_file->file_info_lock);
 368        cifsFileInfo_get_locked(cifs_file);
 369        spin_unlock(&cifs_file->file_info_lock);
 370        return cifs_file;
 371}
 372
 373static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
 374{
 375        struct inode *inode = d_inode(cifs_file->dentry);
 376        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 377        struct cifsLockInfo *li, *tmp;
 378        struct super_block *sb = inode->i_sb;
 379
 380        /*
 381         * Delete any outstanding lock records. We'll lose them when the file
 382         * is closed anyway.
 383         */
 384        cifs_down_write(&cifsi->lock_sem);
 385        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 386                list_del(&li->llist);
 387                cifs_del_lock_waiters(li);
 388                kfree(li);
 389        }
 390        list_del(&cifs_file->llist->llist);
 391        kfree(cifs_file->llist);
 392        up_write(&cifsi->lock_sem);
 393
 394        cifs_put_tlink(cifs_file->tlink);
 395        dput(cifs_file->dentry);
 396        cifs_sb_deactive(sb);
 397        kfree(cifs_file);
 398}
 399
 400static void cifsFileInfo_put_work(struct work_struct *work)
 401{
 402        struct cifsFileInfo *cifs_file = container_of(work,
 403                        struct cifsFileInfo, put);
 404
 405        cifsFileInfo_put_final(cifs_file);
 406}
 407
 408/**
 409 * cifsFileInfo_put - release a reference of file priv data
 410 *
 411 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
 412 *
 413 * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
 414 */
 415void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 416{
 417        _cifsFileInfo_put(cifs_file, true, true);
 418}
 419
 420/**
 421 * _cifsFileInfo_put - release a reference of file priv data
 422 *
 423 * This may involve closing the filehandle @cifs_file out on the
 424 * server. Must be called without holding tcon->open_file_lock,
 425 * cinode->open_file_lock and cifs_file->file_info_lock.
 426 *
 427 * If @wait_for_oplock_handler is true and we are releasing the last
 428 * reference, wait for any running oplock break handler of the file
 429 * and cancel any pending one.
 430 *
 431 * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
 432 * @wait_oplock_handler: must be false if called from oplock_break_handler
 433 * @offload:    not offloaded on close and oplock breaks
 434 *
 435 */
 436void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
 437                       bool wait_oplock_handler, bool offload)
 438{
 439        struct inode *inode = d_inode(cifs_file->dentry);
 440        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 441        struct TCP_Server_Info *server = tcon->ses->server;
 442        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 443        struct super_block *sb = inode->i_sb;
 444        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 445        struct cifs_fid fid;
 446        struct cifs_pending_open open;
 447        bool oplock_break_cancelled;
 448
 449        spin_lock(&tcon->open_file_lock);
 450        spin_lock(&cifsi->open_file_lock);
 451        spin_lock(&cifs_file->file_info_lock);
 452        if (--cifs_file->count > 0) {
 453                spin_unlock(&cifs_file->file_info_lock);
 454                spin_unlock(&cifsi->open_file_lock);
 455                spin_unlock(&tcon->open_file_lock);
 456                return;
 457        }
 458        spin_unlock(&cifs_file->file_info_lock);
 459
 460        if (server->ops->get_lease_key)
 461                server->ops->get_lease_key(inode, &fid);
 462
 463        /* store open in pending opens to make sure we don't miss lease break */
 464        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 465
 466        /* remove it from the lists */
 467        list_del(&cifs_file->flist);
 468        list_del(&cifs_file->tlist);
 469        atomic_dec(&tcon->num_local_opens);
 470
 471        if (list_empty(&cifsi->openFileList)) {
 472                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 473                         d_inode(cifs_file->dentry));
 474                /*
 475                 * In strict cache mode we need invalidate mapping on the last
 476                 * close  because it may cause a error when we open this file
 477                 * again and get at least level II oplock.
 478                 */
 479                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 480                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 481                cifs_set_oplock_level(cifsi, 0);
 482        }
 483
 484        spin_unlock(&cifsi->open_file_lock);
 485        spin_unlock(&tcon->open_file_lock);
 486
 487        oplock_break_cancelled = wait_oplock_handler ?
 488                cancel_work_sync(&cifs_file->oplock_break) : false;
 489
 490        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 491                struct TCP_Server_Info *server = tcon->ses->server;
 492                unsigned int xid;
 493
 494                xid = get_xid();
 495                if (server->ops->close_getattr)
 496                        server->ops->close_getattr(xid, tcon, cifs_file);
 497                else if (server->ops->close)
 498                        server->ops->close(xid, tcon, &cifs_file->fid);
 499                _free_xid(xid);
 500        }
 501
 502        if (oplock_break_cancelled)
 503                cifs_done_oplock_break(cifsi);
 504
 505        cifs_del_pending_open(&open);
 506
 507        if (offload)
 508                queue_work(fileinfo_put_wq, &cifs_file->put);
 509        else
 510                cifsFileInfo_put_final(cifs_file);
 511}
 512
 513int cifs_open(struct inode *inode, struct file *file)
 514
 515{
 516        int rc = -EACCES;
 517        unsigned int xid;
 518        __u32 oplock;
 519        struct cifs_sb_info *cifs_sb;
 520        struct TCP_Server_Info *server;
 521        struct cifs_tcon *tcon;
 522        struct tcon_link *tlink;
 523        struct cifsFileInfo *cfile = NULL;
 524        void *page;
 525        const char *full_path;
 526        bool posix_open_ok = false;
 527        struct cifs_fid fid;
 528        struct cifs_pending_open open;
 529
 530        xid = get_xid();
 531
 532        cifs_sb = CIFS_SB(inode->i_sb);
 533        if (unlikely(cifs_forced_shutdown(cifs_sb))) {
 534                free_xid(xid);
 535                return -EIO;
 536        }
 537
 538        tlink = cifs_sb_tlink(cifs_sb);
 539        if (IS_ERR(tlink)) {
 540                free_xid(xid);
 541                return PTR_ERR(tlink);
 542        }
 543        tcon = tlink_tcon(tlink);
 544        server = tcon->ses->server;
 545
 546        page = alloc_dentry_path();
 547        full_path = build_path_from_dentry(file_dentry(file), page);
 548        if (IS_ERR(full_path)) {
 549                rc = PTR_ERR(full_path);
 550                goto out;
 551        }
 552
 553        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 554                 inode, file->f_flags, full_path);
 555
 556        if (file->f_flags & O_DIRECT &&
 557            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 558                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 559                        file->f_op = &cifs_file_direct_nobrl_ops;
 560                else
 561                        file->f_op = &cifs_file_direct_ops;
 562        }
 563
 564        /* Get the cached handle as SMB2 close is deferred */
 565        rc = cifs_get_readable_path(tcon, full_path, &cfile);
 566        if (rc == 0) {
 567                if (file->f_flags == cfile->f_flags) {
 568                        file->private_data = cfile;
 569                        spin_lock(&CIFS_I(inode)->deferred_lock);
 570                        cifs_del_deferred_close(cfile);
 571                        spin_unlock(&CIFS_I(inode)->deferred_lock);
 572                        goto out;
 573                } else {
 574                        _cifsFileInfo_put(cfile, true, false);
 575                }
 576        }
 577
 578        if (server->oplocks)
 579                oplock = REQ_OPLOCK;
 580        else
 581                oplock = 0;
 582
 583        if (!tcon->broken_posix_open && tcon->unix_ext &&
 584            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 585                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 586                /* can not refresh inode info since size could be stale */
 587                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 588                                cifs_sb->ctx->file_mode /* ignored */,
 589                                file->f_flags, &oplock, &fid.netfid, xid);
 590                if (rc == 0) {
 591                        cifs_dbg(FYI, "posix open succeeded\n");
 592                        posix_open_ok = true;
 593                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 594                        if (tcon->ses->serverNOS)
 595                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 596                                         tcon->ses->ip_addr,
 597                                         tcon->ses->serverNOS);
 598                        tcon->broken_posix_open = true;
 599                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 600                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 601                        goto out;
 602                /*
 603                 * Else fallthrough to retry open the old way on network i/o
 604                 * or DFS errors.
 605                 */
 606        }
 607
 608        if (server->ops->get_lease_key)
 609                server->ops->get_lease_key(inode, &fid);
 610
 611        cifs_add_pending_open(&fid, tlink, &open);
 612
 613        if (!posix_open_ok) {
 614                if (server->ops->get_lease_key)
 615                        server->ops->get_lease_key(inode, &fid);
 616
 617                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 618                                  file->f_flags, &oplock, &fid, xid);
 619                if (rc) {
 620                        cifs_del_pending_open(&open);
 621                        goto out;
 622                }
 623        }
 624
 625        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 626        if (cfile == NULL) {
 627                if (server->ops->close)
 628                        server->ops->close(xid, tcon, &fid);
 629                cifs_del_pending_open(&open);
 630                rc = -ENOMEM;
 631                goto out;
 632        }
 633
 634        cifs_fscache_set_inode_cookie(inode, file);
 635
 636        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 637                /*
 638                 * Time to set mode which we can not set earlier due to
 639                 * problems creating new read-only files.
 640                 */
 641                struct cifs_unix_set_info_args args = {
 642                        .mode   = inode->i_mode,
 643                        .uid    = INVALID_UID, /* no change */
 644                        .gid    = INVALID_GID, /* no change */
 645                        .ctime  = NO_CHANGE_64,
 646                        .atime  = NO_CHANGE_64,
 647                        .mtime  = NO_CHANGE_64,
 648                        .device = 0,
 649                };
 650                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 651                                       cfile->pid);
 652        }
 653
 654out:
 655        free_dentry_path(page);
 656        free_xid(xid);
 657        cifs_put_tlink(tlink);
 658        return rc;
 659}
 660
 661static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 662
 663/*
 664 * Try to reacquire byte range locks that were released when session
 665 * to server was lost.
 666 */
 667static int
 668cifs_relock_file(struct cifsFileInfo *cfile)
 669{
 670        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 671        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 672        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 673        int rc = 0;
 674
 675        down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 676        if (cinode->can_cache_brlcks) {
 677                /* can cache locks - no need to relock */
 678                up_read(&cinode->lock_sem);
 679                return rc;
 680        }
 681
 682        if (cap_unix(tcon->ses) &&
 683            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 684            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 685                rc = cifs_push_posix_locks(cfile);
 686        else
 687                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 688
 689        up_read(&cinode->lock_sem);
 690        return rc;
 691}
 692
 693static int
 694cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 695{
 696        int rc = -EACCES;
 697        unsigned int xid;
 698        __u32 oplock;
 699        struct cifs_sb_info *cifs_sb;
 700        struct cifs_tcon *tcon;
 701        struct TCP_Server_Info *server;
 702        struct cifsInodeInfo *cinode;
 703        struct inode *inode;
 704        void *page;
 705        const char *full_path;
 706        int desired_access;
 707        int disposition = FILE_OPEN;
 708        int create_options = CREATE_NOT_DIR;
 709        struct cifs_open_parms oparms;
 710
 711        xid = get_xid();
 712        mutex_lock(&cfile->fh_mutex);
 713        if (!cfile->invalidHandle) {
 714                mutex_unlock(&cfile->fh_mutex);
 715                free_xid(xid);
 716                return 0;
 717        }
 718
 719        inode = d_inode(cfile->dentry);
 720        cifs_sb = CIFS_SB(inode->i_sb);
 721        tcon = tlink_tcon(cfile->tlink);
 722        server = tcon->ses->server;
 723
 724        /*
 725         * Can not grab rename sem here because various ops, including those
 726         * that already have the rename sem can end up causing writepage to get
 727         * called and if the server was down that means we end up here, and we
 728         * can never tell if the caller already has the rename_sem.
 729         */
 730        page = alloc_dentry_path();
 731        full_path = build_path_from_dentry(cfile->dentry, page);
 732        if (IS_ERR(full_path)) {
 733                mutex_unlock(&cfile->fh_mutex);
 734                free_dentry_path(page);
 735                free_xid(xid);
 736                return PTR_ERR(full_path);
 737        }
 738
 739        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 740                 inode, cfile->f_flags, full_path);
 741
 742        if (tcon->ses->server->oplocks)
 743                oplock = REQ_OPLOCK;
 744        else
 745                oplock = 0;
 746
 747        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 748            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 749                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 750                /*
 751                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 752                 * original open. Must mask them off for a reopen.
 753                 */
 754                unsigned int oflags = cfile->f_flags &
 755                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 756
 757                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 758                                     cifs_sb->ctx->file_mode /* ignored */,
 759                                     oflags, &oplock, &cfile->fid.netfid, xid);
 760                if (rc == 0) {
 761                        cifs_dbg(FYI, "posix reopen succeeded\n");
 762                        oparms.reconnect = true;
 763                        goto reopen_success;
 764                }
 765                /*
 766                 * fallthrough to retry open the old way on errors, especially
 767                 * in the reconnect path it is important to retry hard
 768                 */
 769        }
 770
 771        desired_access = cifs_convert_flags(cfile->f_flags);
 772
 773        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 774        if (cfile->f_flags & O_SYNC)
 775                create_options |= CREATE_WRITE_THROUGH;
 776
 777        if (cfile->f_flags & O_DIRECT)
 778                create_options |= CREATE_NO_BUFFER;
 779
 780        if (server->ops->get_lease_key)
 781                server->ops->get_lease_key(inode, &cfile->fid);
 782
 783        oparms.tcon = tcon;
 784        oparms.cifs_sb = cifs_sb;
 785        oparms.desired_access = desired_access;
 786        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 787        oparms.disposition = disposition;
 788        oparms.path = full_path;
 789        oparms.fid = &cfile->fid;
 790        oparms.reconnect = true;
 791
 792        /*
 793         * Can not refresh inode by passing in file_info buf to be returned by
 794         * ops->open and then calling get_inode_info with returned buf since
 795         * file might have write behind data that needs to be flushed and server
 796         * version of file size can be stale. If we knew for sure that inode was
 797         * not dirty locally we could do this.
 798         */
 799        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 800        if (rc == -ENOENT && oparms.reconnect == false) {
 801                /* durable handle timeout is expired - open the file again */
 802                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 803                /* indicate that we need to relock the file */
 804                oparms.reconnect = true;
 805        }
 806
 807        if (rc) {
 808                mutex_unlock(&cfile->fh_mutex);
 809                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 810                cifs_dbg(FYI, "oplock: %d\n", oplock);
 811                goto reopen_error_exit;
 812        }
 813
 814reopen_success:
 815        cfile->invalidHandle = false;
 816        mutex_unlock(&cfile->fh_mutex);
 817        cinode = CIFS_I(inode);
 818
 819        if (can_flush) {
 820                rc = filemap_write_and_wait(inode->i_mapping);
 821                if (!is_interrupt_error(rc))
 822                        mapping_set_error(inode->i_mapping, rc);
 823
 824                if (tcon->posix_extensions)
 825                        rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
 826                else if (tcon->unix_ext)
 827                        rc = cifs_get_inode_info_unix(&inode, full_path,
 828                                                      inode->i_sb, xid);
 829                else
 830                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 831                                                 inode->i_sb, xid, NULL);
 832        }
 833        /*
 834         * Else we are writing out data to server already and could deadlock if
 835         * we tried to flush data, and since we do not know if we have data that
 836         * would invalidate the current end of file on the server we can not go
 837         * to the server to get the new inode info.
 838         */
 839
 840        /*
 841         * If the server returned a read oplock and we have mandatory brlocks,
 842         * set oplock level to None.
 843         */
 844        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 845                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 846                oplock = 0;
 847        }
 848
 849        server->ops->set_fid(cfile, &cfile->fid, oplock);
 850        if (oparms.reconnect)
 851                cifs_relock_file(cfile);
 852
 853reopen_error_exit:
 854        free_dentry_path(page);
 855        free_xid(xid);
 856        return rc;
 857}
 858
 859void smb2_deferred_work_close(struct work_struct *work)
 860{
 861        struct cifsFileInfo *cfile = container_of(work,
 862                        struct cifsFileInfo, deferred.work);
 863
 864        spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 865        cifs_del_deferred_close(cfile);
 866        cfile->deferred_close_scheduled = false;
 867        spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 868        _cifsFileInfo_put(cfile, true, false);
 869}
 870
 871int cifs_close(struct inode *inode, struct file *file)
 872{
 873        struct cifsFileInfo *cfile;
 874        struct cifsInodeInfo *cinode = CIFS_I(inode);
 875        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 876        struct cifs_deferred_close *dclose;
 877
 878        if (file->private_data != NULL) {
 879                cfile = file->private_data;
 880                file->private_data = NULL;
 881                dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
 882                if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
 883                    cinode->lease_granted &&
 884                    dclose) {
 885                        if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
 886                                inode->i_ctime = inode->i_mtime = current_time(inode);
 887                        spin_lock(&cinode->deferred_lock);
 888                        cifs_add_deferred_close(cfile, dclose);
 889                        if (cfile->deferred_close_scheduled &&
 890                            delayed_work_pending(&cfile->deferred)) {
 891                                /*
 892                                 * If there is no pending work, mod_delayed_work queues new work.
 893                                 * So, Increase the ref count to avoid use-after-free.
 894                                 */
 895                                if (!mod_delayed_work(deferredclose_wq,
 896                                                &cfile->deferred, cifs_sb->ctx->acregmax))
 897                                        cifsFileInfo_get(cfile);
 898                        } else {
 899                                /* Deferred close for files */
 900                                queue_delayed_work(deferredclose_wq,
 901                                                &cfile->deferred, cifs_sb->ctx->acregmax);
 902                                cfile->deferred_close_scheduled = true;
 903                                spin_unlock(&cinode->deferred_lock);
 904                                return 0;
 905                        }
 906                        spin_unlock(&cinode->deferred_lock);
 907                        _cifsFileInfo_put(cfile, true, false);
 908                } else {
 909                        _cifsFileInfo_put(cfile, true, false);
 910                        kfree(dclose);
 911                }
 912        }
 913
 914        /* return code from the ->release op is always ignored */
 915        return 0;
 916}
 917
 918void
 919cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 920{
 921        struct cifsFileInfo *open_file;
 922        struct list_head *tmp;
 923        struct list_head *tmp1;
 924        struct list_head tmp_list;
 925
 926        if (!tcon->use_persistent || !tcon->need_reopen_files)
 927                return;
 928
 929        tcon->need_reopen_files = false;
 930
 931        cifs_dbg(FYI, "Reopen persistent handles\n");
 932        INIT_LIST_HEAD(&tmp_list);
 933
 934        /* list all files open on tree connection, reopen resilient handles  */
 935        spin_lock(&tcon->open_file_lock);
 936        list_for_each(tmp, &tcon->openFileList) {
 937                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 938                if (!open_file->invalidHandle)
 939                        continue;
 940                cifsFileInfo_get(open_file);
 941                list_add_tail(&open_file->rlist, &tmp_list);
 942        }
 943        spin_unlock(&tcon->open_file_lock);
 944
 945        list_for_each_safe(tmp, tmp1, &tmp_list) {
 946                open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 947                if (cifs_reopen_file(open_file, false /* do not flush */))
 948                        tcon->need_reopen_files = true;
 949                list_del_init(&open_file->rlist);
 950                cifsFileInfo_put(open_file);
 951        }
 952}
 953
 954int cifs_closedir(struct inode *inode, struct file *file)
 955{
 956        int rc = 0;
 957        unsigned int xid;
 958        struct cifsFileInfo *cfile = file->private_data;
 959        struct cifs_tcon *tcon;
 960        struct TCP_Server_Info *server;
 961        char *buf;
 962
 963        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 964
 965        if (cfile == NULL)
 966                return rc;
 967
 968        xid = get_xid();
 969        tcon = tlink_tcon(cfile->tlink);
 970        server = tcon->ses->server;
 971
 972        cifs_dbg(FYI, "Freeing private data in close dir\n");
 973        spin_lock(&cfile->file_info_lock);
 974        if (server->ops->dir_needs_close(cfile)) {
 975                cfile->invalidHandle = true;
 976                spin_unlock(&cfile->file_info_lock);
 977                if (server->ops->close_dir)
 978                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 979                else
 980                        rc = -ENOSYS;
 981                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 982                /* not much we can do if it fails anyway, ignore rc */
 983                rc = 0;
 984        } else
 985                spin_unlock(&cfile->file_info_lock);
 986
 987        buf = cfile->srch_inf.ntwrk_buf_start;
 988        if (buf) {
 989                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 990                cfile->srch_inf.ntwrk_buf_start = NULL;
 991                if (cfile->srch_inf.smallBuf)
 992                        cifs_small_buf_release(buf);
 993                else
 994                        cifs_buf_release(buf);
 995        }
 996
 997        cifs_put_tlink(cfile->tlink);
 998        kfree(file->private_data);
 999        file->private_data = NULL;
1000        /* BB can we lock the filestruct while this is going on? */
1001        free_xid(xid);
1002        return rc;
1003}
1004
1005static struct cifsLockInfo *
1006cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1007{
1008        struct cifsLockInfo *lock =
1009                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1010        if (!lock)
1011                return lock;
1012        lock->offset = offset;
1013        lock->length = length;
1014        lock->type = type;
1015        lock->pid = current->tgid;
1016        lock->flags = flags;
1017        INIT_LIST_HEAD(&lock->blist);
1018        init_waitqueue_head(&lock->block_q);
1019        return lock;
1020}
1021
1022void
1023cifs_del_lock_waiters(struct cifsLockInfo *lock)
1024{
1025        struct cifsLockInfo *li, *tmp;
1026        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1027                list_del_init(&li->blist);
1028                wake_up(&li->block_q);
1029        }
1030}
1031
1032#define CIFS_LOCK_OP    0
1033#define CIFS_READ_OP    1
1034#define CIFS_WRITE_OP   2
1035
1036/* @rw_check : 0 - no op, 1 - read, 2 - write */
1037static bool
1038cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1039                            __u64 length, __u8 type, __u16 flags,
1040                            struct cifsFileInfo *cfile,
1041                            struct cifsLockInfo **conf_lock, int rw_check)
1042{
1043        struct cifsLockInfo *li;
1044        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1045        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1046
1047        list_for_each_entry(li, &fdlocks->locks, llist) {
1048                if (offset + length <= li->offset ||
1049                    offset >= li->offset + li->length)
1050                        continue;
1051                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1052                    server->ops->compare_fids(cfile, cur_cfile)) {
1053                        /* shared lock prevents write op through the same fid */
1054                        if (!(li->type & server->vals->shared_lock_type) ||
1055                            rw_check != CIFS_WRITE_OP)
1056                                continue;
1057                }
1058                if ((type & server->vals->shared_lock_type) &&
1059                    ((server->ops->compare_fids(cfile, cur_cfile) &&
1060                     current->tgid == li->pid) || type == li->type))
1061                        continue;
1062                if (rw_check == CIFS_LOCK_OP &&
1063                    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1064                    server->ops->compare_fids(cfile, cur_cfile))
1065                        continue;
1066                if (conf_lock)
1067                        *conf_lock = li;
1068                return true;
1069        }
1070        return false;
1071}
1072
1073bool
1074cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1075                        __u8 type, __u16 flags,
1076                        struct cifsLockInfo **conf_lock, int rw_check)
1077{
1078        bool rc = false;
1079        struct cifs_fid_locks *cur;
1080        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1081
1082        list_for_each_entry(cur, &cinode->llist, llist) {
1083                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1084                                                 flags, cfile, conf_lock,
1085                                                 rw_check);
1086                if (rc)
1087                        break;
1088        }
1089
1090        return rc;
1091}
1092
1093/*
1094 * Check if there is another lock that prevents us to set the lock (mandatory
1095 * style). If such a lock exists, update the flock structure with its
1096 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1097 * or leave it the same if we can't. Returns 0 if we don't need to request to
1098 * the server or 1 otherwise.
1099 */
1100static int
1101cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1102               __u8 type, struct file_lock *flock)
1103{
1104        int rc = 0;
1105        struct cifsLockInfo *conf_lock;
1106        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1107        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1108        bool exist;
1109
1110        down_read(&cinode->lock_sem);
1111
1112        exist = cifs_find_lock_conflict(cfile, offset, length, type,
1113                                        flock->fl_flags, &conf_lock,
1114                                        CIFS_LOCK_OP);
1115        if (exist) {
1116                flock->fl_start = conf_lock->offset;
1117                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1118                flock->fl_pid = conf_lock->pid;
1119                if (conf_lock->type & server->vals->shared_lock_type)
1120                        flock->fl_type = F_RDLCK;
1121                else
1122                        flock->fl_type = F_WRLCK;
1123        } else if (!cinode->can_cache_brlcks)
1124                rc = 1;
1125        else
1126                flock->fl_type = F_UNLCK;
1127
1128        up_read(&cinode->lock_sem);
1129        return rc;
1130}
1131
1132static void
1133cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1134{
1135        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1136        cifs_down_write(&cinode->lock_sem);
1137        list_add_tail(&lock->llist, &cfile->llist->locks);
1138        up_write(&cinode->lock_sem);
1139}
1140
1141/*
1142 * Set the byte-range lock (mandatory style). Returns:
1143 * 1) 0, if we set the lock and don't need to request to the server;
1144 * 2) 1, if no locks prevent us but we need to request to the server;
1145 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1146 */
1147static int
1148cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1149                 bool wait)
1150{
1151        struct cifsLockInfo *conf_lock;
1152        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1153        bool exist;
1154        int rc = 0;
1155
1156try_again:
1157        exist = false;
1158        cifs_down_write(&cinode->lock_sem);
1159
1160        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1161                                        lock->type, lock->flags, &conf_lock,
1162                                        CIFS_LOCK_OP);
1163        if (!exist && cinode->can_cache_brlcks) {
1164                list_add_tail(&lock->llist, &cfile->llist->locks);
1165                up_write(&cinode->lock_sem);
1166                return rc;
1167        }
1168
1169        if (!exist)
1170                rc = 1;
1171        else if (!wait)
1172                rc = -EACCES;
1173        else {
1174                list_add_tail(&lock->blist, &conf_lock->blist);
1175                up_write(&cinode->lock_sem);
1176                rc = wait_event_interruptible(lock->block_q,
1177                                        (lock->blist.prev == &lock->blist) &&
1178                                        (lock->blist.next == &lock->blist));
1179                if (!rc)
1180                        goto try_again;
1181                cifs_down_write(&cinode->lock_sem);
1182                list_del_init(&lock->blist);
1183        }
1184
1185        up_write(&cinode->lock_sem);
1186        return rc;
1187}
1188
1189/*
1190 * Check if there is another lock that prevents us to set the lock (posix
1191 * style). If such a lock exists, update the flock structure with its
1192 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1193 * or leave it the same if we can't. Returns 0 if we don't need to request to
1194 * the server or 1 otherwise.
1195 */
1196static int
1197cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1198{
1199        int rc = 0;
1200        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1201        unsigned char saved_type = flock->fl_type;
1202
1203        if ((flock->fl_flags & FL_POSIX) == 0)
1204                return 1;
1205
1206        down_read(&cinode->lock_sem);
1207        posix_test_lock(file, flock);
1208
1209        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1210                flock->fl_type = saved_type;
1211                rc = 1;
1212        }
1213
1214        up_read(&cinode->lock_sem);
1215        return rc;
1216}
1217
1218/*
1219 * Set the byte-range lock (posix style). Returns:
1220 * 1) <0, if the error occurs while setting the lock;
1221 * 2) 0, if we set the lock and don't need to request to the server;
1222 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1223 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1224 */
1225static int
1226cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1227{
1228        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1229        int rc = FILE_LOCK_DEFERRED + 1;
1230
1231        if ((flock->fl_flags & FL_POSIX) == 0)
1232                return rc;
1233
1234        cifs_down_write(&cinode->lock_sem);
1235        if (!cinode->can_cache_brlcks) {
1236                up_write(&cinode->lock_sem);
1237                return rc;
1238        }
1239
1240        rc = posix_lock_file(file, flock, NULL);
1241        up_write(&cinode->lock_sem);
1242        return rc;
1243}
1244
1245int
1246cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1247{
1248        unsigned int xid;
1249        int rc = 0, stored_rc;
1250        struct cifsLockInfo *li, *tmp;
1251        struct cifs_tcon *tcon;
1252        unsigned int num, max_num, max_buf;
1253        LOCKING_ANDX_RANGE *buf, *cur;
1254        static const int types[] = {
1255                LOCKING_ANDX_LARGE_FILES,
1256                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1257        };
1258        int i;
1259
1260        xid = get_xid();
1261        tcon = tlink_tcon(cfile->tlink);
1262
1263        /*
1264         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1265         * and check it before using.
1266         */
1267        max_buf = tcon->ses->server->maxBuf;
1268        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1269                free_xid(xid);
1270                return -EINVAL;
1271        }
1272
1273        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1274                     PAGE_SIZE);
1275        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1276                        PAGE_SIZE);
1277        max_num = (max_buf - sizeof(struct smb_hdr)) /
1278                                                sizeof(LOCKING_ANDX_RANGE);
1279        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1280        if (!buf) {
1281                free_xid(xid);
1282                return -ENOMEM;
1283        }
1284
1285        for (i = 0; i < 2; i++) {
1286                cur = buf;
1287                num = 0;
1288                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1289                        if (li->type != types[i])
1290                                continue;
1291                        cur->Pid = cpu_to_le16(li->pid);
1292                        cur->LengthLow = cpu_to_le32((u32)li->length);
1293                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1294                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1295                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1296                        if (++num == max_num) {
1297                                stored_rc = cifs_lockv(xid, tcon,
1298                                                       cfile->fid.netfid,
1299                                                       (__u8)li->type, 0, num,
1300                                                       buf);
1301                                if (stored_rc)
1302                                        rc = stored_rc;
1303                                cur = buf;
1304                                num = 0;
1305                        } else
1306                                cur++;
1307                }
1308
1309                if (num) {
1310                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1311                                               (__u8)types[i], 0, num, buf);
1312                        if (stored_rc)
1313                                rc = stored_rc;
1314                }
1315        }
1316
1317        kfree(buf);
1318        free_xid(xid);
1319        return rc;
1320}
1321
1322static __u32
1323hash_lockowner(fl_owner_t owner)
1324{
1325        return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1326}
1327
1328struct lock_to_push {
1329        struct list_head llist;
1330        __u64 offset;
1331        __u64 length;
1332        __u32 pid;
1333        __u16 netfid;
1334        __u8 type;
1335};
1336
1337static int
1338cifs_push_posix_locks(struct cifsFileInfo *cfile)
1339{
1340        struct inode *inode = d_inode(cfile->dentry);
1341        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1342        struct file_lock *flock;
1343        struct file_lock_context *flctx = inode->i_flctx;
1344        unsigned int count = 0, i;
1345        int rc = 0, xid, type;
1346        struct list_head locks_to_send, *el;
1347        struct lock_to_push *lck, *tmp;
1348        __u64 length;
1349
1350        xid = get_xid();
1351
1352        if (!flctx)
1353                goto out;
1354
1355        spin_lock(&flctx->flc_lock);
1356        list_for_each(el, &flctx->flc_posix) {
1357                count++;
1358        }
1359        spin_unlock(&flctx->flc_lock);
1360
1361        INIT_LIST_HEAD(&locks_to_send);
1362
1363        /*
1364         * Allocating count locks is enough because no FL_POSIX locks can be
1365         * added to the list while we are holding cinode->lock_sem that
1366         * protects locking operations of this inode.
1367         */
1368        for (i = 0; i < count; i++) {
1369                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1370                if (!lck) {
1371                        rc = -ENOMEM;
1372                        goto err_out;
1373                }
1374                list_add_tail(&lck->llist, &locks_to_send);
1375        }
1376
1377        el = locks_to_send.next;
1378        spin_lock(&flctx->flc_lock);
1379        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1380                if (el == &locks_to_send) {
1381                        /*
1382                         * The list ended. We don't have enough allocated
1383                         * structures - something is really wrong.
1384                         */
1385                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1386                        break;
1387                }
1388                length = 1 + flock->fl_end - flock->fl_start;
1389                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1390                        type = CIFS_RDLCK;
1391                else
1392                        type = CIFS_WRLCK;
1393                lck = list_entry(el, struct lock_to_push, llist);
1394                lck->pid = hash_lockowner(flock->fl_owner);
1395                lck->netfid = cfile->fid.netfid;
1396                lck->length = length;
1397                lck->type = type;
1398                lck->offset = flock->fl_start;
1399        }
1400        spin_unlock(&flctx->flc_lock);
1401
1402        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1403                int stored_rc;
1404
1405                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1406                                             lck->offset, lck->length, NULL,
1407                                             lck->type, 0);
1408                if (stored_rc)
1409                        rc = stored_rc;
1410                list_del(&lck->llist);
1411                kfree(lck);
1412        }
1413
1414out:
1415        free_xid(xid);
1416        return rc;
1417err_out:
1418        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1419                list_del(&lck->llist);
1420                kfree(lck);
1421        }
1422        goto out;
1423}
1424
1425static int
1426cifs_push_locks(struct cifsFileInfo *cfile)
1427{
1428        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1429        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1430        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1431        int rc = 0;
1432
1433        /* we are going to update can_cache_brlcks here - need a write access */
1434        cifs_down_write(&cinode->lock_sem);
1435        if (!cinode->can_cache_brlcks) {
1436                up_write(&cinode->lock_sem);
1437                return rc;
1438        }
1439
1440        if (cap_unix(tcon->ses) &&
1441            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1442            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1443                rc = cifs_push_posix_locks(cfile);
1444        else
1445                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1446
1447        cinode->can_cache_brlcks = false;
1448        up_write(&cinode->lock_sem);
1449        return rc;
1450}
1451
1452static void
1453cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1454                bool *wait_flag, struct TCP_Server_Info *server)
1455{
1456        if (flock->fl_flags & FL_POSIX)
1457                cifs_dbg(FYI, "Posix\n");
1458        if (flock->fl_flags & FL_FLOCK)
1459                cifs_dbg(FYI, "Flock\n");
1460        if (flock->fl_flags & FL_SLEEP) {
1461                cifs_dbg(FYI, "Blocking lock\n");
1462                *wait_flag = true;
1463        }
1464        if (flock->fl_flags & FL_ACCESS)
1465                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1466        if (flock->fl_flags & FL_LEASE)
1467                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1468        if (flock->fl_flags &
1469            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1470               FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1471                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1472
1473        *type = server->vals->large_lock_type;
1474        if (flock->fl_type == F_WRLCK) {
1475                cifs_dbg(FYI, "F_WRLCK\n");
1476                *type |= server->vals->exclusive_lock_type;
1477                *lock = 1;
1478        } else if (flock->fl_type == F_UNLCK) {
1479                cifs_dbg(FYI, "F_UNLCK\n");
1480                *type |= server->vals->unlock_lock_type;
1481                *unlock = 1;
1482                /* Check if unlock includes more than one lock range */
1483        } else if (flock->fl_type == F_RDLCK) {
1484                cifs_dbg(FYI, "F_RDLCK\n");
1485                *type |= server->vals->shared_lock_type;
1486                *lock = 1;
1487        } else if (flock->fl_type == F_EXLCK) {
1488                cifs_dbg(FYI, "F_EXLCK\n");
1489                *type |= server->vals->exclusive_lock_type;
1490                *lock = 1;
1491        } else if (flock->fl_type == F_SHLCK) {
1492                cifs_dbg(FYI, "F_SHLCK\n");
1493                *type |= server->vals->shared_lock_type;
1494                *lock = 1;
1495        } else
1496                cifs_dbg(FYI, "Unknown type of lock\n");
1497}
1498
1499static int
1500cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1501           bool wait_flag, bool posix_lck, unsigned int xid)
1502{
1503        int rc = 0;
1504        __u64 length = 1 + flock->fl_end - flock->fl_start;
1505        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1506        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1507        struct TCP_Server_Info *server = tcon->ses->server;
1508        __u16 netfid = cfile->fid.netfid;
1509
1510        if (posix_lck) {
1511                int posix_lock_type;
1512
1513                rc = cifs_posix_lock_test(file, flock);
1514                if (!rc)
1515                        return rc;
1516
1517                if (type & server->vals->shared_lock_type)
1518                        posix_lock_type = CIFS_RDLCK;
1519                else
1520                        posix_lock_type = CIFS_WRLCK;
1521                rc = CIFSSMBPosixLock(xid, tcon, netfid,
1522                                      hash_lockowner(flock->fl_owner),
1523                                      flock->fl_start, length, flock,
1524                                      posix_lock_type, wait_flag);
1525                return rc;
1526        }
1527
1528        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1529        if (!rc)
1530                return rc;
1531
1532        /* BB we could chain these into one lock request BB */
1533        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1534                                    1, 0, false);
1535        if (rc == 0) {
1536                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1537                                            type, 0, 1, false);
1538                flock->fl_type = F_UNLCK;
1539                if (rc != 0)
1540                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1541                                 rc);
1542                return 0;
1543        }
1544
1545        if (type & server->vals->shared_lock_type) {
1546                flock->fl_type = F_WRLCK;
1547                return 0;
1548        }
1549
1550        type &= ~server->vals->exclusive_lock_type;
1551
1552        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1553                                    type | server->vals->shared_lock_type,
1554                                    1, 0, false);
1555        if (rc == 0) {
1556                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                        type | server->vals->shared_lock_type, 0, 1, false);
1558                flock->fl_type = F_RDLCK;
1559                if (rc != 0)
1560                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1561                                 rc);
1562        } else
1563                flock->fl_type = F_WRLCK;
1564
1565        return 0;
1566}
1567
1568void
1569cifs_move_llist(struct list_head *source, struct list_head *dest)
1570{
1571        struct list_head *li, *tmp;
1572        list_for_each_safe(li, tmp, source)
1573                list_move(li, dest);
1574}
1575
1576void
1577cifs_free_llist(struct list_head *llist)
1578{
1579        struct cifsLockInfo *li, *tmp;
1580        list_for_each_entry_safe(li, tmp, llist, llist) {
1581                cifs_del_lock_waiters(li);
1582                list_del(&li->llist);
1583                kfree(li);
1584        }
1585}
1586
1587int
1588cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1589                  unsigned int xid)
1590{
1591        int rc = 0, stored_rc;
1592        static const int types[] = {
1593                LOCKING_ANDX_LARGE_FILES,
1594                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1595        };
1596        unsigned int i;
1597        unsigned int max_num, num, max_buf;
1598        LOCKING_ANDX_RANGE *buf, *cur;
1599        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1600        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1601        struct cifsLockInfo *li, *tmp;
1602        __u64 length = 1 + flock->fl_end - flock->fl_start;
1603        struct list_head tmp_llist;
1604
1605        INIT_LIST_HEAD(&tmp_llist);
1606
1607        /*
1608         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1609         * and check it before using.
1610         */
1611        max_buf = tcon->ses->server->maxBuf;
1612        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1613                return -EINVAL;
1614
1615        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1616                     PAGE_SIZE);
1617        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1618                        PAGE_SIZE);
1619        max_num = (max_buf - sizeof(struct smb_hdr)) /
1620                                                sizeof(LOCKING_ANDX_RANGE);
1621        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1622        if (!buf)
1623                return -ENOMEM;
1624
1625        cifs_down_write(&cinode->lock_sem);
1626        for (i = 0; i < 2; i++) {
1627                cur = buf;
1628                num = 0;
1629                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1630                        if (flock->fl_start > li->offset ||
1631                            (flock->fl_start + length) <
1632                            (li->offset + li->length))
1633                                continue;
1634                        if (current->tgid != li->pid)
1635                                continue;
1636                        if (types[i] != li->type)
1637                                continue;
1638                        if (cinode->can_cache_brlcks) {
1639                                /*
1640                                 * We can cache brlock requests - simply remove
1641                                 * a lock from the file's list.
1642                                 */
1643                                list_del(&li->llist);
1644                                cifs_del_lock_waiters(li);
1645                                kfree(li);
1646                                continue;
1647                        }
1648                        cur->Pid = cpu_to_le16(li->pid);
1649                        cur->LengthLow = cpu_to_le32((u32)li->length);
1650                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1651                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1652                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1653                        /*
1654                         * We need to save a lock here to let us add it again to
1655                         * the file's list if the unlock range request fails on
1656                         * the server.
1657                         */
1658                        list_move(&li->llist, &tmp_llist);
1659                        if (++num == max_num) {
1660                                stored_rc = cifs_lockv(xid, tcon,
1661                                                       cfile->fid.netfid,
1662                                                       li->type, num, 0, buf);
1663                                if (stored_rc) {
1664                                        /*
1665                                         * We failed on the unlock range
1666                                         * request - add all locks from the tmp
1667                                         * list to the head of the file's list.
1668                                         */
1669                                        cifs_move_llist(&tmp_llist,
1670                                                        &cfile->llist->locks);
1671                                        rc = stored_rc;
1672                                } else
1673                                        /*
1674                                         * The unlock range request succeed -
1675                                         * free the tmp list.
1676                                         */
1677                                        cifs_free_llist(&tmp_llist);
1678                                cur = buf;
1679                                num = 0;
1680                        } else
1681                                cur++;
1682                }
1683                if (num) {
1684                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1685                                               types[i], num, 0, buf);
1686                        if (stored_rc) {
1687                                cifs_move_llist(&tmp_llist,
1688                                                &cfile->llist->locks);
1689                                rc = stored_rc;
1690                        } else
1691                                cifs_free_llist(&tmp_llist);
1692                }
1693        }
1694
1695        up_write(&cinode->lock_sem);
1696        kfree(buf);
1697        return rc;
1698}
1699
1700static int
1701cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1702           bool wait_flag, bool posix_lck, int lock, int unlock,
1703           unsigned int xid)
1704{
1705        int rc = 0;
1706        __u64 length = 1 + flock->fl_end - flock->fl_start;
1707        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1708        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1709        struct TCP_Server_Info *server = tcon->ses->server;
1710        struct inode *inode = d_inode(cfile->dentry);
1711
1712        if (posix_lck) {
1713                int posix_lock_type;
1714
1715                rc = cifs_posix_lock_set(file, flock);
1716                if (rc <= FILE_LOCK_DEFERRED)
1717                        return rc;
1718
1719                if (type & server->vals->shared_lock_type)
1720                        posix_lock_type = CIFS_RDLCK;
1721                else
1722                        posix_lock_type = CIFS_WRLCK;
1723
1724                if (unlock == 1)
1725                        posix_lock_type = CIFS_UNLCK;
1726
1727                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1728                                      hash_lockowner(flock->fl_owner),
1729                                      flock->fl_start, length,
1730                                      NULL, posix_lock_type, wait_flag);
1731                goto out;
1732        }
1733
1734        if (lock) {
1735                struct cifsLockInfo *lock;
1736
1737                lock = cifs_lock_init(flock->fl_start, length, type,
1738                                      flock->fl_flags);
1739                if (!lock)
1740                        return -ENOMEM;
1741
1742                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1743                if (rc < 0) {
1744                        kfree(lock);
1745                        return rc;
1746                }
1747                if (!rc)
1748                        goto out;
1749
1750                /*
1751                 * Windows 7 server can delay breaking lease from read to None
1752                 * if we set a byte-range lock on a file - break it explicitly
1753                 * before sending the lock to the server to be sure the next
1754                 * read won't conflict with non-overlapted locks due to
1755                 * pagereading.
1756                 */
1757                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1758                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1759                        cifs_zap_mapping(inode);
1760                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1761                                 inode);
1762                        CIFS_I(inode)->oplock = 0;
1763                }
1764
1765                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766                                            type, 1, 0, wait_flag);
1767                if (rc) {
1768                        kfree(lock);
1769                        return rc;
1770                }
1771
1772                cifs_lock_add(cfile, lock);
1773        } else if (unlock)
1774                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1775
1776out:
1777        if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1778                /*
1779                 * If this is a request to remove all locks because we
1780                 * are closing the file, it doesn't matter if the
1781                 * unlocking failed as both cifs.ko and the SMB server
1782                 * remove the lock on file close
1783                 */
1784                if (rc) {
1785                        cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1786                        if (!(flock->fl_flags & FL_CLOSE))
1787                                return rc;
1788                }
1789                rc = locks_lock_file_wait(file, flock);
1790        }
1791        return rc;
1792}
1793
1794int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1795{
1796        int rc, xid;
1797        int lock = 0, unlock = 0;
1798        bool wait_flag = false;
1799        bool posix_lck = false;
1800        struct cifs_sb_info *cifs_sb;
1801        struct cifs_tcon *tcon;
1802        struct cifsFileInfo *cfile;
1803        __u32 type;
1804
1805        rc = -EACCES;
1806        xid = get_xid();
1807
1808        if (!(fl->fl_flags & FL_FLOCK))
1809                return -ENOLCK;
1810
1811        cfile = (struct cifsFileInfo *)file->private_data;
1812        tcon = tlink_tcon(cfile->tlink);
1813
1814        cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1815                        tcon->ses->server);
1816        cifs_sb = CIFS_FILE_SB(file);
1817
1818        if (cap_unix(tcon->ses) &&
1819            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1820            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1821                posix_lck = true;
1822
1823        if (!lock && !unlock) {
1824                /*
1825                 * if no lock or unlock then nothing to do since we do not
1826                 * know what it is
1827                 */
1828                free_xid(xid);
1829                return -EOPNOTSUPP;
1830        }
1831
1832        rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1833                        xid);
1834        free_xid(xid);
1835        return rc;
1836
1837
1838}
1839
1840int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1841{
1842        int rc, xid;
1843        int lock = 0, unlock = 0;
1844        bool wait_flag = false;
1845        bool posix_lck = false;
1846        struct cifs_sb_info *cifs_sb;
1847        struct cifs_tcon *tcon;
1848        struct cifsFileInfo *cfile;
1849        __u32 type;
1850
1851        rc = -EACCES;
1852        xid = get_xid();
1853
1854        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1855                 cmd, flock->fl_flags, flock->fl_type,
1856                 flock->fl_start, flock->fl_end);
1857
1858        cfile = (struct cifsFileInfo *)file->private_data;
1859        tcon = tlink_tcon(cfile->tlink);
1860
1861        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1862                        tcon->ses->server);
1863        cifs_sb = CIFS_FILE_SB(file);
1864
1865        if (cap_unix(tcon->ses) &&
1866            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1867            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1868                posix_lck = true;
1869        /*
1870         * BB add code here to normalize offset and length to account for
1871         * negative length which we can not accept over the wire.
1872         */
1873        if (IS_GETLK(cmd)) {
1874                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1875                free_xid(xid);
1876                return rc;
1877        }
1878
1879        if (!lock && !unlock) {
1880                /*
1881                 * if no lock or unlock then nothing to do since we do not
1882                 * know what it is
1883                 */
1884                free_xid(xid);
1885                return -EOPNOTSUPP;
1886        }
1887
1888        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1889                        xid);
1890        free_xid(xid);
1891        return rc;
1892}
1893
1894/*
1895 * update the file size (if needed) after a write. Should be called with
1896 * the inode->i_lock held
1897 */
1898void
1899cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1900                      unsigned int bytes_written)
1901{
1902        loff_t end_of_write = offset + bytes_written;
1903
1904        if (end_of_write > cifsi->server_eof)
1905                cifsi->server_eof = end_of_write;
1906}
1907
1908static ssize_t
1909cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1910           size_t write_size, loff_t *offset)
1911{
1912        int rc = 0;
1913        unsigned int bytes_written = 0;
1914        unsigned int total_written;
1915        struct cifs_tcon *tcon;
1916        struct TCP_Server_Info *server;
1917        unsigned int xid;
1918        struct dentry *dentry = open_file->dentry;
1919        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1920        struct cifs_io_parms io_parms = {0};
1921
1922        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1923                 write_size, *offset, dentry);
1924
1925        tcon = tlink_tcon(open_file->tlink);
1926        server = tcon->ses->server;
1927
1928        if (!server->ops->sync_write)
1929                return -ENOSYS;
1930
1931        xid = get_xid();
1932
1933        for (total_written = 0; write_size > total_written;
1934             total_written += bytes_written) {
1935                rc = -EAGAIN;
1936                while (rc == -EAGAIN) {
1937                        struct kvec iov[2];
1938                        unsigned int len;
1939
1940                        if (open_file->invalidHandle) {
1941                                /* we could deadlock if we called
1942                                   filemap_fdatawait from here so tell
1943                                   reopen_file not to flush data to
1944                                   server now */
1945                                rc = cifs_reopen_file(open_file, false);
1946                                if (rc != 0)
1947                                        break;
1948                        }
1949
1950                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1951                                  (unsigned int)write_size - total_written);
1952                        /* iov[0] is reserved for smb header */
1953                        iov[1].iov_base = (char *)write_data + total_written;
1954                        iov[1].iov_len = len;
1955                        io_parms.pid = pid;
1956                        io_parms.tcon = tcon;
1957                        io_parms.offset = *offset;
1958                        io_parms.length = len;
1959                        rc = server->ops->sync_write(xid, &open_file->fid,
1960                                        &io_parms, &bytes_written, iov, 1);
1961                }
1962                if (rc || (bytes_written == 0)) {
1963                        if (total_written)
1964                                break;
1965                        else {
1966                                free_xid(xid);
1967                                return rc;
1968                        }
1969                } else {
1970                        spin_lock(&d_inode(dentry)->i_lock);
1971                        cifs_update_eof(cifsi, *offset, bytes_written);
1972                        spin_unlock(&d_inode(dentry)->i_lock);
1973                        *offset += bytes_written;
1974                }
1975        }
1976
1977        cifs_stats_bytes_written(tcon, total_written);
1978
1979        if (total_written > 0) {
1980                spin_lock(&d_inode(dentry)->i_lock);
1981                if (*offset > d_inode(dentry)->i_size) {
1982                        i_size_write(d_inode(dentry), *offset);
1983                        d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1984                }
1985                spin_unlock(&d_inode(dentry)->i_lock);
1986        }
1987        mark_inode_dirty_sync(d_inode(dentry));
1988        free_xid(xid);
1989        return total_written;
1990}
1991
1992struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1993                                        bool fsuid_only)
1994{
1995        struct cifsFileInfo *open_file = NULL;
1996        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1997
1998        /* only filter by fsuid on multiuser mounts */
1999        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2000                fsuid_only = false;
2001
2002        spin_lock(&cifs_inode->open_file_lock);
2003        /* we could simply get the first_list_entry since write-only entries
2004           are always at the end of the list but since the first entry might
2005           have a close pending, we go through the whole list */
2006        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2007                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2008                        continue;
2009                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2010                        if ((!open_file->invalidHandle)) {
2011                                /* found a good file */
2012                                /* lock it so it will not be closed on us */
2013                                cifsFileInfo_get(open_file);
2014                                spin_unlock(&cifs_inode->open_file_lock);
2015                                return open_file;
2016                        } /* else might as well continue, and look for
2017                             another, or simply have the caller reopen it
2018                             again rather than trying to fix this handle */
2019                } else /* write only file */
2020                        break; /* write only files are last so must be done */
2021        }
2022        spin_unlock(&cifs_inode->open_file_lock);
2023        return NULL;
2024}
2025
2026/* Return -EBADF if no handle is found and general rc otherwise */
2027int
2028cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2029                       struct cifsFileInfo **ret_file)
2030{
2031        struct cifsFileInfo *open_file, *inv_file = NULL;
2032        struct cifs_sb_info *cifs_sb;
2033        bool any_available = false;
2034        int rc = -EBADF;
2035        unsigned int refind = 0;
2036        bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2037        bool with_delete = flags & FIND_WR_WITH_DELETE;
2038        *ret_file = NULL;
2039
2040        /*
2041         * Having a null inode here (because mapping->host was set to zero by
2042         * the VFS or MM) should not happen but we had reports of on oops (due
2043         * to it being zero) during stress testcases so we need to check for it
2044         */
2045
2046        if (cifs_inode == NULL) {
2047                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2048                dump_stack();
2049                return rc;
2050        }
2051
2052        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2053
2054        /* only filter by fsuid on multiuser mounts */
2055        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2056                fsuid_only = false;
2057
2058        spin_lock(&cifs_inode->open_file_lock);
2059refind_writable:
2060        if (refind > MAX_REOPEN_ATT) {
2061                spin_unlock(&cifs_inode->open_file_lock);
2062                return rc;
2063        }
2064        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2065                if (!any_available && open_file->pid != current->tgid)
2066                        continue;
2067                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2068                        continue;
2069                if (with_delete && !(open_file->fid.access & DELETE))
2070                        continue;
2071                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2072                        if (!open_file->invalidHandle) {
2073                                /* found a good writable file */
2074                                cifsFileInfo_get(open_file);
2075                                spin_unlock(&cifs_inode->open_file_lock);
2076                                *ret_file = open_file;
2077                                return 0;
2078                        } else {
2079                                if (!inv_file)
2080                                        inv_file = open_file;
2081                        }
2082                }
2083        }
2084        /* couldn't find useable FH with same pid, try any available */
2085        if (!any_available) {
2086                any_available = true;
2087                goto refind_writable;
2088        }
2089
2090        if (inv_file) {
2091                any_available = false;
2092                cifsFileInfo_get(inv_file);
2093        }
2094
2095        spin_unlock(&cifs_inode->open_file_lock);
2096
2097        if (inv_file) {
2098                rc = cifs_reopen_file(inv_file, false);
2099                if (!rc) {
2100                        *ret_file = inv_file;
2101                        return 0;
2102                }
2103
2104                spin_lock(&cifs_inode->open_file_lock);
2105                list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2106                spin_unlock(&cifs_inode->open_file_lock);
2107                cifsFileInfo_put(inv_file);
2108                ++refind;
2109                inv_file = NULL;
2110                spin_lock(&cifs_inode->open_file_lock);
2111                goto refind_writable;
2112        }
2113
2114        return rc;
2115}
2116
2117struct cifsFileInfo *
2118find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2119{
2120        struct cifsFileInfo *cfile;
2121        int rc;
2122
2123        rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2124        if (rc)
2125                cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2126
2127        return cfile;
2128}
2129
2130int
2131cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2132                       int flags,
2133                       struct cifsFileInfo **ret_file)
2134{
2135        struct cifsFileInfo *cfile;
2136        void *page = alloc_dentry_path();
2137
2138        *ret_file = NULL;
2139
2140        spin_lock(&tcon->open_file_lock);
2141        list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2142                struct cifsInodeInfo *cinode;
2143                const char *full_path = build_path_from_dentry(cfile->dentry, page);
2144                if (IS_ERR(full_path)) {
2145                        spin_unlock(&tcon->open_file_lock);
2146                        free_dentry_path(page);
2147                        return PTR_ERR(full_path);
2148                }
2149                if (strcmp(full_path, name))
2150                        continue;
2151
2152                cinode = CIFS_I(d_inode(cfile->dentry));
2153                spin_unlock(&tcon->open_file_lock);
2154                free_dentry_path(page);
2155                return cifs_get_writable_file(cinode, flags, ret_file);
2156        }
2157
2158        spin_unlock(&tcon->open_file_lock);
2159        free_dentry_path(page);
2160        return -ENOENT;
2161}
2162
2163int
2164cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2165                       struct cifsFileInfo **ret_file)
2166{
2167        struct cifsFileInfo *cfile;
2168        void *page = alloc_dentry_path();
2169
2170        *ret_file = NULL;
2171
2172        spin_lock(&tcon->open_file_lock);
2173        list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2174                struct cifsInodeInfo *cinode;
2175                const char *full_path = build_path_from_dentry(cfile->dentry, page);
2176                if (IS_ERR(full_path)) {
2177                        spin_unlock(&tcon->open_file_lock);
2178                        free_dentry_path(page);
2179                        return PTR_ERR(full_path);
2180                }
2181                if (strcmp(full_path, name))
2182                        continue;
2183
2184                cinode = CIFS_I(d_inode(cfile->dentry));
2185                spin_unlock(&tcon->open_file_lock);
2186                free_dentry_path(page);
2187                *ret_file = find_readable_file(cinode, 0);
2188                return *ret_file ? 0 : -ENOENT;
2189        }
2190
2191        spin_unlock(&tcon->open_file_lock);
2192        free_dentry_path(page);
2193        return -ENOENT;
2194}
2195
2196static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2197{
2198        struct address_space *mapping = page->mapping;
2199        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2200        char *write_data;
2201        int rc = -EFAULT;
2202        int bytes_written = 0;
2203        struct inode *inode;
2204        struct cifsFileInfo *open_file;
2205
2206        if (!mapping || !mapping->host)
2207                return -EFAULT;
2208
2209        inode = page->mapping->host;
2210
2211        offset += (loff_t)from;
2212        write_data = kmap(page);
2213        write_data += from;
2214
2215        if ((to > PAGE_SIZE) || (from > to)) {
2216                kunmap(page);
2217                return -EIO;
2218        }
2219
2220        /* racing with truncate? */
2221        if (offset > mapping->host->i_size) {
2222                kunmap(page);
2223                return 0; /* don't care */
2224        }
2225
2226        /* check to make sure that we are not extending the file */
2227        if (mapping->host->i_size - offset < (loff_t)to)
2228                to = (unsigned)(mapping->host->i_size - offset);
2229
2230        rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2231                                    &open_file);
2232        if (!rc) {
2233                bytes_written = cifs_write(open_file, open_file->pid,
2234                                           write_data, to - from, &offset);
2235                cifsFileInfo_put(open_file);
2236                /* Does mm or vfs already set times? */
2237                inode->i_atime = inode->i_mtime = current_time(inode);
2238                if ((bytes_written > 0) && (offset))
2239                        rc = 0;
2240                else if (bytes_written < 0)
2241                        rc = bytes_written;
2242                else
2243                        rc = -EFAULT;
2244        } else {
2245                cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2246                if (!is_retryable_error(rc))
2247                        rc = -EIO;
2248        }
2249
2250        kunmap(page);
2251        return rc;
2252}
2253
2254static struct cifs_writedata *
2255wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2256                          pgoff_t end, pgoff_t *index,
2257                          unsigned int *found_pages)
2258{
2259        struct cifs_writedata *wdata;
2260
2261        wdata = cifs_writedata_alloc((unsigned int)tofind,
2262                                     cifs_writev_complete);
2263        if (!wdata)
2264                return NULL;
2265
2266        *found_pages = find_get_pages_range_tag(mapping, index, end,
2267                                PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2268        return wdata;
2269}
2270
2271static unsigned int
2272wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2273                    struct address_space *mapping,
2274                    struct writeback_control *wbc,
2275                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2276{
2277        unsigned int nr_pages = 0, i;
2278        struct page *page;
2279
2280        for (i = 0; i < found_pages; i++) {
2281                page = wdata->pages[i];
2282                /*
2283                 * At this point we hold neither the i_pages lock nor the
2284                 * page lock: the page may be truncated or invalidated
2285                 * (changing page->mapping to NULL), or even swizzled
2286                 * back from swapper_space to tmpfs file mapping
2287                 */
2288
2289                if (nr_pages == 0)
2290                        lock_page(page);
2291                else if (!trylock_page(page))
2292                        break;
2293
2294                if (unlikely(page->mapping != mapping)) {
2295                        unlock_page(page);
2296                        break;
2297                }
2298
2299                if (!wbc->range_cyclic && page->index > end) {
2300                        *done = true;
2301                        unlock_page(page);
2302                        break;
2303                }
2304
2305                if (*next && (page->index != *next)) {
2306                        /* Not next consecutive page */
2307                        unlock_page(page);
2308                        break;
2309                }
2310
2311                if (wbc->sync_mode != WB_SYNC_NONE)
2312                        wait_on_page_writeback(page);
2313
2314                if (PageWriteback(page) ||
2315                                !clear_page_dirty_for_io(page)) {
2316                        unlock_page(page);
2317                        break;
2318                }
2319
2320                /*
2321                 * This actually clears the dirty bit in the radix tree.
2322                 * See cifs_writepage() for more commentary.
2323                 */
2324                set_page_writeback(page);
2325                if (page_offset(page) >= i_size_read(mapping->host)) {
2326                        *done = true;
2327                        unlock_page(page);
2328                        end_page_writeback(page);
2329                        break;
2330                }
2331
2332                wdata->pages[i] = page;
2333                *next = page->index + 1;
2334                ++nr_pages;
2335        }
2336
2337        /* reset index to refind any pages skipped */
2338        if (nr_pages == 0)
2339                *index = wdata->pages[0]->index + 1;
2340
2341        /* put any pages we aren't going to use */
2342        for (i = nr_pages; i < found_pages; i++) {
2343                put_page(wdata->pages[i]);
2344                wdata->pages[i] = NULL;
2345        }
2346
2347        return nr_pages;
2348}
2349
2350static int
2351wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2352                 struct address_space *mapping, struct writeback_control *wbc)
2353{
2354        int rc;
2355
2356        wdata->sync_mode = wbc->sync_mode;
2357        wdata->nr_pages = nr_pages;
2358        wdata->offset = page_offset(wdata->pages[0]);
2359        wdata->pagesz = PAGE_SIZE;
2360        wdata->tailsz = min(i_size_read(mapping->host) -
2361                        page_offset(wdata->pages[nr_pages - 1]),
2362                        (loff_t)PAGE_SIZE);
2363        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2364        wdata->pid = wdata->cfile->pid;
2365
2366        rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2367        if (rc)
2368                return rc;
2369
2370        if (wdata->cfile->invalidHandle)
2371                rc = -EAGAIN;
2372        else
2373                rc = wdata->server->ops->async_writev(wdata,
2374                                                      cifs_writedata_release);
2375
2376        return rc;
2377}
2378
2379static int cifs_writepages(struct address_space *mapping,
2380                           struct writeback_control *wbc)
2381{
2382        struct inode *inode = mapping->host;
2383        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2384        struct TCP_Server_Info *server;
2385        bool done = false, scanned = false, range_whole = false;
2386        pgoff_t end, index;
2387        struct cifs_writedata *wdata;
2388        struct cifsFileInfo *cfile = NULL;
2389        int rc = 0;
2390        int saved_rc = 0;
2391        unsigned int xid;
2392
2393        /*
2394         * If wsize is smaller than the page cache size, default to writing
2395         * one page at a time via cifs_writepage
2396         */
2397        if (cifs_sb->ctx->wsize < PAGE_SIZE)
2398                return generic_writepages(mapping, wbc);
2399
2400        xid = get_xid();
2401        if (wbc->range_cyclic) {
2402                index = mapping->writeback_index; /* Start from prev offset */
2403                end = -1;
2404        } else {
2405                index = wbc->range_start >> PAGE_SHIFT;
2406                end = wbc->range_end >> PAGE_SHIFT;
2407                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2408                        range_whole = true;
2409                scanned = true;
2410        }
2411        server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2412
2413retry:
2414        while (!done && index <= end) {
2415                unsigned int i, nr_pages, found_pages, wsize;
2416                pgoff_t next = 0, tofind, saved_index = index;
2417                struct cifs_credits credits_on_stack;
2418                struct cifs_credits *credits = &credits_on_stack;
2419                int get_file_rc = 0;
2420
2421                if (cfile)
2422                        cifsFileInfo_put(cfile);
2423
2424                rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2425
2426                /* in case of an error store it to return later */
2427                if (rc)
2428                        get_file_rc = rc;
2429
2430                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2431                                                   &wsize, credits);
2432                if (rc != 0) {
2433                        done = true;
2434                        break;
2435                }
2436
2437                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2438
2439                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2440                                                  &found_pages);
2441                if (!wdata) {
2442                        rc = -ENOMEM;
2443                        done = true;
2444                        add_credits_and_wake_if(server, credits, 0);
2445                        break;
2446                }
2447
2448                if (found_pages == 0) {
2449                        kref_put(&wdata->refcount, cifs_writedata_release);
2450                        add_credits_and_wake_if(server, credits, 0);
2451                        break;
2452                }
2453
2454                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2455                                               end, &index, &next, &done);
2456
2457                /* nothing to write? */
2458                if (nr_pages == 0) {
2459                        kref_put(&wdata->refcount, cifs_writedata_release);
2460                        add_credits_and_wake_if(server, credits, 0);
2461                        continue;
2462                }
2463
2464                wdata->credits = credits_on_stack;
2465                wdata->cfile = cfile;
2466                wdata->server = server;
2467                cfile = NULL;
2468
2469                if (!wdata->cfile) {
2470                        cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2471                                 get_file_rc);
2472                        if (is_retryable_error(get_file_rc))
2473                                rc = get_file_rc;
2474                        else
2475                                rc = -EBADF;
2476                } else
2477                        rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2478
2479                for (i = 0; i < nr_pages; ++i)
2480                        unlock_page(wdata->pages[i]);
2481
2482                /* send failure -- clean up the mess */
2483                if (rc != 0) {
2484                        add_credits_and_wake_if(server, &wdata->credits, 0);
2485                        for (i = 0; i < nr_pages; ++i) {
2486                                if (is_retryable_error(rc))
2487                                        redirty_page_for_writepage(wbc,
2488                                                           wdata->pages[i]);
2489                                else
2490                                        SetPageError(wdata->pages[i]);
2491                                end_page_writeback(wdata->pages[i]);
2492                                put_page(wdata->pages[i]);
2493                        }
2494                        if (!is_retryable_error(rc))
2495                                mapping_set_error(mapping, rc);
2496                }
2497                kref_put(&wdata->refcount, cifs_writedata_release);
2498
2499                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2500                        index = saved_index;
2501                        continue;
2502                }
2503
2504                /* Return immediately if we received a signal during writing */
2505                if (is_interrupt_error(rc)) {
2506                        done = true;
2507                        break;
2508                }
2509
2510                if (rc != 0 && saved_rc == 0)
2511                        saved_rc = rc;
2512
2513                wbc->nr_to_write -= nr_pages;
2514                if (wbc->nr_to_write <= 0)
2515                        done = true;
2516
2517                index = next;
2518        }
2519
2520        if (!scanned && !done) {
2521                /*
2522                 * We hit the last page and there is more work to be done: wrap
2523                 * back to the start of the file
2524                 */
2525                scanned = true;
2526                index = 0;
2527                goto retry;
2528        }
2529
2530        if (saved_rc != 0)
2531                rc = saved_rc;
2532
2533        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2534                mapping->writeback_index = index;
2535
2536        if (cfile)
2537                cifsFileInfo_put(cfile);
2538        free_xid(xid);
2539        /* Indication to update ctime and mtime as close is deferred */
2540        set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2541        return rc;
2542}
2543
2544static int
2545cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2546{
2547        int rc;
2548        unsigned int xid;
2549
2550        xid = get_xid();
2551/* BB add check for wbc flags */
2552        get_page(page);
2553        if (!PageUptodate(page))
2554                cifs_dbg(FYI, "ppw - page not up to date\n");
2555
2556        /*
2557         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2558         *
2559         * A writepage() implementation always needs to do either this,
2560         * or re-dirty the page with "redirty_page_for_writepage()" in
2561         * the case of a failure.
2562         *
2563         * Just unlocking the page will cause the radix tree tag-bits
2564         * to fail to update with the state of the page correctly.
2565         */
2566        set_page_writeback(page);
2567retry_write:
2568        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2569        if (is_retryable_error(rc)) {
2570                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2571                        goto retry_write;
2572                redirty_page_for_writepage(wbc, page);
2573        } else if (rc != 0) {
2574                SetPageError(page);
2575                mapping_set_error(page->mapping, rc);
2576        } else {
2577                SetPageUptodate(page);
2578        }
2579        end_page_writeback(page);
2580        put_page(page);
2581        free_xid(xid);
2582        return rc;
2583}
2584
2585static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2586{
2587        int rc = cifs_writepage_locked(page, wbc);
2588        unlock_page(page);
2589        return rc;
2590}
2591
2592static int cifs_write_end(struct file *file, struct address_space *mapping,
2593                        loff_t pos, unsigned len, unsigned copied,
2594                        struct page *page, void *fsdata)
2595{
2596        int rc;
2597        struct inode *inode = mapping->host;
2598        struct cifsFileInfo *cfile = file->private_data;
2599        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2600        __u32 pid;
2601
2602        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2603                pid = cfile->pid;
2604        else
2605                pid = current->tgid;
2606
2607        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2608                 page, pos, copied);
2609
2610        if (PageChecked(page)) {
2611                if (copied == len)
2612                        SetPageUptodate(page);
2613                ClearPageChecked(page);
2614        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2615                SetPageUptodate(page);
2616
2617        if (!PageUptodate(page)) {
2618                char *page_data;
2619                unsigned offset = pos & (PAGE_SIZE - 1);
2620                unsigned int xid;
2621
2622                xid = get_xid();
2623                /* this is probably better than directly calling
2624                   partialpage_write since in this function the file handle is
2625                   known which we might as well leverage */
2626                /* BB check if anything else missing out of ppw
2627                   such as updating last write time */
2628                page_data = kmap(page);
2629                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2630                /* if (rc < 0) should we set writebehind rc? */
2631                kunmap(page);
2632
2633                free_xid(xid);
2634        } else {
2635                rc = copied;
2636                pos += copied;
2637                set_page_dirty(page);
2638        }
2639
2640        if (rc > 0) {
2641                spin_lock(&inode->i_lock);
2642                if (pos > inode->i_size) {
2643                        i_size_write(inode, pos);
2644                        inode->i_blocks = (512 - 1 + pos) >> 9;
2645                }
2646                spin_unlock(&inode->i_lock);
2647        }
2648
2649        unlock_page(page);
2650        put_page(page);
2651        /* Indication to update ctime and mtime as close is deferred */
2652        set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2653
2654        return rc;
2655}
2656
2657int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2658                      int datasync)
2659{
2660        unsigned int xid;
2661        int rc = 0;
2662        struct cifs_tcon *tcon;
2663        struct TCP_Server_Info *server;
2664        struct cifsFileInfo *smbfile = file->private_data;
2665        struct inode *inode = file_inode(file);
2666        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2667
2668        rc = file_write_and_wait_range(file, start, end);
2669        if (rc) {
2670                trace_cifs_fsync_err(inode->i_ino, rc);
2671                return rc;
2672        }
2673
2674        xid = get_xid();
2675
2676        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2677                 file, datasync);
2678
2679        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2680                rc = cifs_zap_mapping(inode);
2681                if (rc) {
2682                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2683                        rc = 0; /* don't care about it in fsync */
2684                }
2685        }
2686
2687        tcon = tlink_tcon(smbfile->tlink);
2688        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2689                server = tcon->ses->server;
2690                if (server->ops->flush)
2691                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2692                else
2693                        rc = -ENOSYS;
2694        }
2695
2696        free_xid(xid);
2697        return rc;
2698}
2699
2700int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2701{
2702        unsigned int xid;
2703        int rc = 0;
2704        struct cifs_tcon *tcon;
2705        struct TCP_Server_Info *server;
2706        struct cifsFileInfo *smbfile = file->private_data;
2707        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2708
2709        rc = file_write_and_wait_range(file, start, end);
2710        if (rc) {
2711                trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2712                return rc;
2713        }
2714
2715        xid = get_xid();
2716
2717        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2718                 file, datasync);
2719
2720        tcon = tlink_tcon(smbfile->tlink);
2721        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2722                server = tcon->ses->server;
2723                if (server->ops->flush)
2724                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2725                else
2726                        rc = -ENOSYS;
2727        }
2728
2729        free_xid(xid);
2730        return rc;
2731}
2732
2733/*
2734 * As file closes, flush all cached write data for this inode checking
2735 * for write behind errors.
2736 */
2737int cifs_flush(struct file *file, fl_owner_t id)
2738{
2739        struct inode *inode = file_inode(file);
2740        int rc = 0;
2741
2742        if (file->f_mode & FMODE_WRITE)
2743                rc = filemap_write_and_wait(inode->i_mapping);
2744
2745        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2746        if (rc)
2747                trace_cifs_flush_err(inode->i_ino, rc);
2748        return rc;
2749}
2750
2751static int
2752cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2753{
2754        int rc = 0;
2755        unsigned long i;
2756
2757        for (i = 0; i < num_pages; i++) {
2758                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2759                if (!pages[i]) {
2760                        /*
2761                         * save number of pages we have already allocated and
2762                         * return with ENOMEM error
2763                         */
2764                        num_pages = i;
2765                        rc = -ENOMEM;
2766                        break;
2767                }
2768        }
2769
2770        if (rc) {
2771                for (i = 0; i < num_pages; i++)
2772                        put_page(pages[i]);
2773        }
2774        return rc;
2775}
2776
2777static inline
2778size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2779{
2780        size_t num_pages;
2781        size_t clen;
2782
2783        clen = min_t(const size_t, len, wsize);
2784        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2785
2786        if (cur_len)
2787                *cur_len = clen;
2788
2789        return num_pages;
2790}
2791
2792static void
2793cifs_uncached_writedata_release(struct kref *refcount)
2794{
2795        int i;
2796        struct cifs_writedata *wdata = container_of(refcount,
2797                                        struct cifs_writedata, refcount);
2798
2799        kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2800        for (i = 0; i < wdata->nr_pages; i++)
2801                put_page(wdata->pages[i]);
2802        cifs_writedata_release(refcount);
2803}
2804
2805static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2806
2807static void
2808cifs_uncached_writev_complete(struct work_struct *work)
2809{
2810        struct cifs_writedata *wdata = container_of(work,
2811                                        struct cifs_writedata, work);
2812        struct inode *inode = d_inode(wdata->cfile->dentry);
2813        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2814
2815        spin_lock(&inode->i_lock);
2816        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2817        if (cifsi->server_eof > inode->i_size)
2818                i_size_write(inode, cifsi->server_eof);
2819        spin_unlock(&inode->i_lock);
2820
2821        complete(&wdata->done);
2822        collect_uncached_write_data(wdata->ctx);
2823        /* the below call can possibly free the last ref to aio ctx */
2824        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2825}
2826
2827static int
2828wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2829                      size_t *len, unsigned long *num_pages)
2830{
2831        size_t save_len, copied, bytes, cur_len = *len;
2832        unsigned long i, nr_pages = *num_pages;
2833
2834        save_len = cur_len;
2835        for (i = 0; i < nr_pages; i++) {
2836                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2837                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2838                cur_len -= copied;
2839                /*
2840                 * If we didn't copy as much as we expected, then that
2841                 * may mean we trod into an unmapped area. Stop copying
2842                 * at that point. On the next pass through the big
2843                 * loop, we'll likely end up getting a zero-length
2844                 * write and bailing out of it.
2845                 */
2846                if (copied < bytes)
2847                        break;
2848        }
2849        cur_len = save_len - cur_len;
2850        *len = cur_len;
2851
2852        /*
2853         * If we have no data to send, then that probably means that
2854         * the copy above failed altogether. That's most likely because
2855         * the address in the iovec was bogus. Return -EFAULT and let
2856         * the caller free anything we allocated and bail out.
2857         */
2858        if (!cur_len)
2859                return -EFAULT;
2860
2861        /*
2862         * i + 1 now represents the number of pages we actually used in
2863         * the copy phase above.
2864         */
2865        *num_pages = i + 1;
2866        return 0;
2867}
2868
2869static int
2870cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2871        struct cifs_aio_ctx *ctx)
2872{
2873        unsigned int wsize;
2874        struct cifs_credits credits;
2875        int rc;
2876        struct TCP_Server_Info *server = wdata->server;
2877
2878        do {
2879                if (wdata->cfile->invalidHandle) {
2880                        rc = cifs_reopen_file(wdata->cfile, false);
2881                        if (rc == -EAGAIN)
2882                                continue;
2883                        else if (rc)
2884                                break;
2885                }
2886
2887
2888                /*
2889                 * Wait for credits to resend this wdata.
2890                 * Note: we are attempting to resend the whole wdata not in
2891                 * segments
2892                 */
2893                do {
2894                        rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2895                                                &wsize, &credits);
2896                        if (rc)
2897                                goto fail;
2898
2899                        if (wsize < wdata->bytes) {
2900                                add_credits_and_wake_if(server, &credits, 0);
2901                                msleep(1000);
2902                        }
2903                } while (wsize < wdata->bytes);
2904                wdata->credits = credits;
2905
2906                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2907
2908                if (!rc) {
2909                        if (wdata->cfile->invalidHandle)
2910                                rc = -EAGAIN;
2911                        else {
2912#ifdef CONFIG_CIFS_SMB_DIRECT
2913                                if (wdata->mr) {
2914                                        wdata->mr->need_invalidate = true;
2915                                        smbd_deregister_mr(wdata->mr);
2916                                        wdata->mr = NULL;
2917                                }
2918#endif
2919                                rc = server->ops->async_writev(wdata,
2920                                        cifs_uncached_writedata_release);
2921                        }
2922                }
2923
2924                /* If the write was successfully sent, we are done */
2925                if (!rc) {
2926                        list_add_tail(&wdata->list, wdata_list);
2927                        return 0;
2928                }
2929
2930                /* Roll back credits and retry if needed */
2931                add_credits_and_wake_if(server, &wdata->credits, 0);
2932        } while (rc == -EAGAIN);
2933
2934fail:
2935        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2936        return rc;
2937}
2938
2939static int
2940cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2941                     struct cifsFileInfo *open_file,
2942                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2943                     struct cifs_aio_ctx *ctx)
2944{
2945        int rc = 0;
2946        size_t cur_len;
2947        unsigned long nr_pages, num_pages, i;
2948        struct cifs_writedata *wdata;
2949        struct iov_iter saved_from = *from;
2950        loff_t saved_offset = offset;
2951        pid_t pid;
2952        struct TCP_Server_Info *server;
2953        struct page **pagevec;
2954        size_t start;
2955        unsigned int xid;
2956
2957        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2958                pid = open_file->pid;
2959        else
2960                pid = current->tgid;
2961
2962        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2963        xid = get_xid();
2964
2965        do {
2966                unsigned int wsize;
2967                struct cifs_credits credits_on_stack;
2968                struct cifs_credits *credits = &credits_on_stack;
2969
2970                if (open_file->invalidHandle) {
2971                        rc = cifs_reopen_file(open_file, false);
2972                        if (rc == -EAGAIN)
2973                                continue;
2974                        else if (rc)
2975                                break;
2976                }
2977
2978                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2979                                                   &wsize, credits);
2980                if (rc)
2981                        break;
2982
2983                cur_len = min_t(const size_t, len, wsize);
2984
2985                if (ctx->direct_io) {
2986                        ssize_t result;
2987
2988                        result = iov_iter_get_pages_alloc(
2989                                from, &pagevec, cur_len, &start);
2990                        if (result < 0) {
2991                                cifs_dbg(VFS,
2992                                         "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2993                                         result, iov_iter_type(from),
2994                                         from->iov_offset, from->count);
2995                                dump_stack();
2996
2997                                rc = result;
2998                                add_credits_and_wake_if(server, credits, 0);
2999                                break;
3000                        }
3001                        cur_len = (size_t)result;
3002                        iov_iter_advance(from, cur_len);
3003
3004                        nr_pages =
3005                                (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3006
3007                        wdata = cifs_writedata_direct_alloc(pagevec,
3008                                             cifs_uncached_writev_complete);
3009                        if (!wdata) {
3010                                rc = -ENOMEM;
3011                                add_credits_and_wake_if(server, credits, 0);
3012                                break;
3013                        }
3014
3015
3016                        wdata->page_offset = start;
3017                        wdata->tailsz =
3018                                nr_pages > 1 ?
3019                                        cur_len - (PAGE_SIZE - start) -
3020                                        (nr_pages - 2) * PAGE_SIZE :
3021                                        cur_len;
3022                } else {
3023                        nr_pages = get_numpages(wsize, len, &cur_len);
3024                        wdata = cifs_writedata_alloc(nr_pages,
3025                                             cifs_uncached_writev_complete);
3026                        if (!wdata) {
3027                                rc = -ENOMEM;
3028                                add_credits_and_wake_if(server, credits, 0);
3029                                break;
3030                        }
3031
3032                        rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3033                        if (rc) {
3034                                kvfree(wdata->pages);
3035                                kfree(wdata);
3036                                add_credits_and_wake_if(server, credits, 0);
3037                                break;
3038                        }
3039
3040                        num_pages = nr_pages;
3041                        rc = wdata_fill_from_iovec(
3042                                wdata, from, &cur_len, &num_pages);
3043                        if (rc) {
3044                                for (i = 0; i < nr_pages; i++)
3045                                        put_page(wdata->pages[i]);
3046                                kvfree(wdata->pages);
3047                                kfree(wdata);
3048                                add_credits_and_wake_if(server, credits, 0);
3049                                break;
3050                        }
3051
3052                        /*
3053                         * Bring nr_pages down to the number of pages we
3054                         * actually used, and free any pages that we didn't use.
3055                         */
3056                        for ( ; nr_pages > num_pages; nr_pages--)
3057                                put_page(wdata->pages[nr_pages - 1]);
3058
3059                        wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3060                }
3061
3062                wdata->sync_mode = WB_SYNC_ALL;
3063                wdata->nr_pages = nr_pages;
3064                wdata->offset = (__u64)offset;
3065                wdata->cfile = cifsFileInfo_get(open_file);
3066                wdata->server = server;
3067                wdata->pid = pid;
3068                wdata->bytes = cur_len;
3069                wdata->pagesz = PAGE_SIZE;
3070                wdata->credits = credits_on_stack;
3071                wdata->ctx = ctx;
3072                kref_get(&ctx->refcount);
3073
3074                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3075
3076                if (!rc) {
3077                        if (wdata->cfile->invalidHandle)
3078                                rc = -EAGAIN;
3079                        else
3080                                rc = server->ops->async_writev(wdata,
3081                                        cifs_uncached_writedata_release);
3082                }
3083
3084                if (rc) {
3085                        add_credits_and_wake_if(server, &wdata->credits, 0);
3086                        kref_put(&wdata->refcount,
3087                                 cifs_uncached_writedata_release);
3088                        if (rc == -EAGAIN) {
3089                                *from = saved_from;
3090                                iov_iter_advance(from, offset - saved_offset);
3091                                continue;
3092                        }
3093                        break;
3094                }
3095
3096                list_add_tail(&wdata->list, wdata_list);
3097                offset += cur_len;
3098                len -= cur_len;
3099        } while (len > 0);
3100
3101        free_xid(xid);
3102        return rc;
3103}
3104
3105static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3106{
3107        struct cifs_writedata *wdata, *tmp;
3108        struct cifs_tcon *tcon;
3109        struct cifs_sb_info *cifs_sb;
3110        struct dentry *dentry = ctx->cfile->dentry;
3111        int rc;
3112
3113        tcon = tlink_tcon(ctx->cfile->tlink);
3114        cifs_sb = CIFS_SB(dentry->d_sb);
3115
3116        mutex_lock(&ctx->aio_mutex);
3117
3118        if (list_empty(&ctx->list)) {
3119                mutex_unlock(&ctx->aio_mutex);
3120                return;
3121        }
3122
3123        rc = ctx->rc;
3124        /*
3125         * Wait for and collect replies for any successful sends in order of
3126         * increasing offset. Once an error is hit, then return without waiting
3127         * for any more replies.
3128         */
3129restart_loop:
3130        list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3131                if (!rc) {
3132                        if (!try_wait_for_completion(&wdata->done)) {
3133                                mutex_unlock(&ctx->aio_mutex);
3134                                return;
3135                        }
3136
3137                        if (wdata->result)
3138                                rc = wdata->result;
3139                        else
3140                                ctx->total_len += wdata->bytes;
3141
3142                        /* resend call if it's a retryable error */
3143                        if (rc == -EAGAIN) {
3144                                struct list_head tmp_list;
3145                                struct iov_iter tmp_from = ctx->iter;
3146
3147                                INIT_LIST_HEAD(&tmp_list);
3148                                list_del_init(&wdata->list);
3149
3150                                if (ctx->direct_io)
3151                                        rc = cifs_resend_wdata(
3152                                                wdata, &tmp_list, ctx);
3153                                else {
3154                                        iov_iter_advance(&tmp_from,
3155                                                 wdata->offset - ctx->pos);
3156
3157                                        rc = cifs_write_from_iter(wdata->offset,
3158                                                wdata->bytes, &tmp_from,
3159                                                ctx->cfile, cifs_sb, &tmp_list,
3160                                                ctx);
3161
3162                                        kref_put(&wdata->refcount,
3163                                                cifs_uncached_writedata_release);
3164                                }
3165
3166                                list_splice(&tmp_list, &ctx->list);
3167                                goto restart_loop;
3168                        }
3169                }
3170                list_del_init(&wdata->list);
3171                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3172        }
3173
3174        cifs_stats_bytes_written(tcon, ctx->total_len);
3175        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3176
3177        ctx->rc = (rc == 0) ? ctx->total_len : rc;
3178
3179        mutex_unlock(&ctx->aio_mutex);
3180
3181        if (ctx->iocb && ctx->iocb->ki_complete)
3182                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3183        else
3184                complete(&ctx->done);
3185}
3186
3187static ssize_t __cifs_writev(
3188        struct kiocb *iocb, struct iov_iter *from, bool direct)
3189{
3190        struct file *file = iocb->ki_filp;
3191        ssize_t total_written = 0;
3192        struct cifsFileInfo *cfile;
3193        struct cifs_tcon *tcon;
3194        struct cifs_sb_info *cifs_sb;
3195        struct cifs_aio_ctx *ctx;
3196        struct iov_iter saved_from = *from;
3197        size_t len = iov_iter_count(from);
3198        int rc;
3199
3200        /*
3201         * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3202         * In this case, fall back to non-direct write function.
3203         * this could be improved by getting pages directly in ITER_KVEC
3204         */
3205        if (direct && iov_iter_is_kvec(from)) {
3206                cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3207                direct = false;
3208        }
3209
3210        rc = generic_write_checks(iocb, from);
3211        if (rc <= 0)
3212                return rc;
3213
3214        cifs_sb = CIFS_FILE_SB(file);
3215        cfile = file->private_data;
3216        tcon = tlink_tcon(cfile->tlink);
3217
3218        if (!tcon->ses->server->ops->async_writev)
3219                return -ENOSYS;
3220
3221        ctx = cifs_aio_ctx_alloc();
3222        if (!ctx)
3223                return -ENOMEM;
3224
3225        ctx->cfile = cifsFileInfo_get(cfile);
3226
3227        if (!is_sync_kiocb(iocb))
3228                ctx->iocb = iocb;
3229
3230        ctx->pos = iocb->ki_pos;
3231
3232        if (direct) {
3233                ctx->direct_io = true;
3234                ctx->iter = *from;
3235                ctx->len = len;
3236        } else {
3237                rc = setup_aio_ctx_iter(ctx, from, WRITE);
3238                if (rc) {
3239                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3240                        return rc;
3241                }
3242        }
3243
3244        /* grab a lock here due to read response handlers can access ctx */
3245        mutex_lock(&ctx->aio_mutex);
3246
3247        rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3248                                  cfile, cifs_sb, &ctx->list, ctx);
3249
3250        /*
3251         * If at least one write was successfully sent, then discard any rc
3252         * value from the later writes. If the other write succeeds, then
3253         * we'll end up returning whatever was written. If it fails, then
3254         * we'll get a new rc value from that.
3255         */
3256        if (!list_empty(&ctx->list))
3257                rc = 0;
3258
3259        mutex_unlock(&ctx->aio_mutex);
3260
3261        if (rc) {
3262                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3263                return rc;
3264        }
3265
3266        if (!is_sync_kiocb(iocb)) {
3267                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268                return -EIOCBQUEUED;
3269        }
3270
3271        rc = wait_for_completion_killable(&ctx->done);
3272        if (rc) {
3273                mutex_lock(&ctx->aio_mutex);
3274                ctx->rc = rc = -EINTR;
3275                total_written = ctx->total_len;
3276                mutex_unlock(&ctx->aio_mutex);
3277        } else {
3278                rc = ctx->rc;
3279                total_written = ctx->total_len;
3280        }
3281
3282        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3283
3284        if (unlikely(!total_written))
3285                return rc;
3286
3287        iocb->ki_pos += total_written;
3288        return total_written;
3289}
3290
3291ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3292{
3293        return __cifs_writev(iocb, from, true);
3294}
3295
3296ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3297{
3298        return __cifs_writev(iocb, from, false);
3299}
3300
3301static ssize_t
3302cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3303{
3304        struct file *file = iocb->ki_filp;
3305        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3306        struct inode *inode = file->f_mapping->host;
3307        struct cifsInodeInfo *cinode = CIFS_I(inode);
3308        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3309        ssize_t rc;
3310
3311        inode_lock(inode);
3312        /*
3313         * We need to hold the sem to be sure nobody modifies lock list
3314         * with a brlock that prevents writing.
3315         */
3316        down_read(&cinode->lock_sem);
3317
3318        rc = generic_write_checks(iocb, from);
3319        if (rc <= 0)
3320                goto out;
3321
3322        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3323                                     server->vals->exclusive_lock_type, 0,
3324                                     NULL, CIFS_WRITE_OP))
3325                rc = __generic_file_write_iter(iocb, from);
3326        else
3327                rc = -EACCES;
3328out:
3329        up_read(&cinode->lock_sem);
3330        inode_unlock(inode);
3331
3332        if (rc > 0)
3333                rc = generic_write_sync(iocb, rc);
3334        return rc;
3335}
3336
3337ssize_t
3338cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3339{
3340        struct inode *inode = file_inode(iocb->ki_filp);
3341        struct cifsInodeInfo *cinode = CIFS_I(inode);
3342        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3343        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3344                                                iocb->ki_filp->private_data;
3345        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3346        ssize_t written;
3347
3348        written = cifs_get_writer(cinode);
3349        if (written)
3350                return written;
3351
3352        if (CIFS_CACHE_WRITE(cinode)) {
3353                if (cap_unix(tcon->ses) &&
3354                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3355                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3356                        written = generic_file_write_iter(iocb, from);
3357                        goto out;
3358                }
3359                written = cifs_writev(iocb, from);
3360                goto out;
3361        }
3362        /*
3363         * For non-oplocked files in strict cache mode we need to write the data
3364         * to the server exactly from the pos to pos+len-1 rather than flush all
3365         * affected pages because it may cause a error with mandatory locks on
3366         * these pages but not on the region from pos to ppos+len-1.
3367         */
3368        written = cifs_user_writev(iocb, from);
3369        if (CIFS_CACHE_READ(cinode)) {
3370                /*
3371                 * We have read level caching and we have just sent a write
3372                 * request to the server thus making data in the cache stale.
3373                 * Zap the cache and set oplock/lease level to NONE to avoid
3374                 * reading stale data from the cache. All subsequent read
3375                 * operations will read new data from the server.
3376                 */
3377                cifs_zap_mapping(inode);
3378                cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3379                         inode);
3380                cinode->oplock = 0;
3381        }
3382out:
3383        cifs_put_writer(cinode);
3384        return written;
3385}
3386
3387static struct cifs_readdata *
3388cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3389{
3390        struct cifs_readdata *rdata;
3391
3392        rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3393        if (rdata != NULL) {
3394                rdata->pages = pages;
3395                kref_init(&rdata->refcount);
3396                INIT_LIST_HEAD(&rdata->list);
3397                init_completion(&rdata->done);
3398                INIT_WORK(&rdata->work, complete);
3399        }
3400
3401        return rdata;
3402}
3403
3404static struct cifs_readdata *
3405cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3406{
3407        struct page **pages =
3408                kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3409        struct cifs_readdata *ret = NULL;
3410
3411        if (pages) {
3412                ret = cifs_readdata_direct_alloc(pages, complete);
3413                if (!ret)
3414                        kfree(pages);
3415        }
3416
3417        return ret;
3418}
3419
3420void
3421cifs_readdata_release(struct kref *refcount)
3422{
3423        struct cifs_readdata *rdata = container_of(refcount,
3424                                        struct cifs_readdata, refcount);
3425#ifdef CONFIG_CIFS_SMB_DIRECT
3426        if (rdata->mr) {
3427                smbd_deregister_mr(rdata->mr);
3428                rdata->mr = NULL;
3429        }
3430#endif
3431        if (rdata->cfile)
3432                cifsFileInfo_put(rdata->cfile);
3433
3434        kvfree(rdata->pages);
3435        kfree(rdata);
3436}
3437
3438static int
3439cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3440{
3441        int rc = 0;
3442        struct page *page;
3443        unsigned int i;
3444
3445        for (i = 0; i < nr_pages; i++) {
3446                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3447                if (!page) {
3448                        rc = -ENOMEM;
3449                        break;
3450                }
3451                rdata->pages[i] = page;
3452        }
3453
3454        if (rc) {
3455                unsigned int nr_page_failed = i;
3456
3457                for (i = 0; i < nr_page_failed; i++) {
3458                        put_page(rdata->pages[i]);
3459                        rdata->pages[i] = NULL;
3460                }
3461        }
3462        return rc;
3463}
3464
3465static void
3466cifs_uncached_readdata_release(struct kref *refcount)
3467{
3468        struct cifs_readdata *rdata = container_of(refcount,
3469                                        struct cifs_readdata, refcount);
3470        unsigned int i;
3471
3472        kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3473        for (i = 0; i < rdata->nr_pages; i++) {
3474                put_page(rdata->pages[i]);
3475        }
3476        cifs_readdata_release(refcount);
3477}
3478
3479/**
3480 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3481 * @rdata:      the readdata response with list of pages holding data
3482 * @iter:       destination for our data
3483 *
3484 * This function copies data from a list of pages in a readdata response into
3485 * an array of iovecs. It will first calculate where the data should go
3486 * based on the info in the readdata and then copy the data into that spot.
3487 */
3488static int
3489cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3490{
3491        size_t remaining = rdata->got_bytes;
3492        unsigned int i;
3493
3494        for (i = 0; i < rdata->nr_pages; i++) {
3495                struct page *page = rdata->pages[i];
3496                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3497                size_t written;
3498
3499                if (unlikely(iov_iter_is_pipe(iter))) {
3500                        void *addr = kmap_atomic(page);
3501
3502                        written = copy_to_iter(addr, copy, iter);
3503                        kunmap_atomic(addr);
3504                } else
3505                        written = copy_page_to_iter(page, 0, copy, iter);
3506                remaining -= written;
3507                if (written < copy && iov_iter_count(iter) > 0)
3508                        break;
3509        }
3510        return remaining ? -EFAULT : 0;
3511}
3512
3513static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3514
3515static void
3516cifs_uncached_readv_complete(struct work_struct *work)
3517{
3518        struct cifs_readdata *rdata = container_of(work,
3519                                                struct cifs_readdata, work);
3520
3521        complete(&rdata->done);
3522        collect_uncached_read_data(rdata->ctx);
3523        /* the below call can possibly free the last ref to aio ctx */
3524        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3525}
3526
3527static int
3528uncached_fill_pages(struct TCP_Server_Info *server,
3529                    struct cifs_readdata *rdata, struct iov_iter *iter,
3530                    unsigned int len)
3531{
3532        int result = 0;
3533        unsigned int i;
3534        unsigned int nr_pages = rdata->nr_pages;
3535        unsigned int page_offset = rdata->page_offset;
3536
3537        rdata->got_bytes = 0;
3538        rdata->tailsz = PAGE_SIZE;
3539        for (i = 0; i < nr_pages; i++) {
3540                struct page *page = rdata->pages[i];
3541                size_t n;
3542                unsigned int segment_size = rdata->pagesz;
3543
3544                if (i == 0)
3545                        segment_size -= page_offset;
3546                else
3547                        page_offset = 0;
3548
3549
3550                if (len <= 0) {
3551                        /* no need to hold page hostage */
3552                        rdata->pages[i] = NULL;
3553                        rdata->nr_pages--;
3554                        put_page(page);
3555                        continue;
3556                }
3557
3558                n = len;
3559                if (len >= segment_size)
3560                        /* enough data to fill the page */
3561                        n = segment_size;
3562                else
3563                        rdata->tailsz = len;
3564                len -= n;
3565
3566                if (iter)
3567                        result = copy_page_from_iter(
3568                                        page, page_offset, n, iter);
3569#ifdef CONFIG_CIFS_SMB_DIRECT
3570                else if (rdata->mr)
3571                        result = n;
3572#endif
3573                else
3574                        result = cifs_read_page_from_socket(
3575                                        server, page, page_offset, n);
3576                if (result < 0)
3577                        break;
3578
3579                rdata->got_bytes += result;
3580        }
3581
3582        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3583                                                rdata->got_bytes : result;
3584}
3585
3586static int
3587cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3588                              struct cifs_readdata *rdata, unsigned int len)
3589{
3590        return uncached_fill_pages(server, rdata, NULL, len);
3591}
3592
3593static int
3594cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3595                              struct cifs_readdata *rdata,
3596                              struct iov_iter *iter)
3597{
3598        return uncached_fill_pages(server, rdata, iter, iter->count);
3599}
3600
3601static int cifs_resend_rdata(struct cifs_readdata *rdata,
3602                        struct list_head *rdata_list,
3603                        struct cifs_aio_ctx *ctx)
3604{
3605        unsigned int rsize;
3606        struct cifs_credits credits;
3607        int rc;
3608        struct TCP_Server_Info *server;
3609
3610        /* XXX: should we pick a new channel here? */
3611        server = rdata->server;
3612
3613        do {
3614                if (rdata->cfile->invalidHandle) {
3615                        rc = cifs_reopen_file(rdata->cfile, true);
3616                        if (rc == -EAGAIN)
3617                                continue;
3618                        else if (rc)
3619                                break;
3620                }
3621
3622                /*
3623                 * Wait for credits to resend this rdata.
3624                 * Note: we are attempting to resend the whole rdata not in
3625                 * segments
3626                 */
3627                do {
3628                        rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3629                                                &rsize, &credits);
3630
3631                        if (rc)
3632                                goto fail;
3633
3634                        if (rsize < rdata->bytes) {
3635                                add_credits_and_wake_if(server, &credits, 0);
3636                                msleep(1000);
3637                        }
3638                } while (rsize < rdata->bytes);
3639                rdata->credits = credits;
3640
3641                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3642                if (!rc) {
3643                        if (rdata->cfile->invalidHandle)
3644                                rc = -EAGAIN;
3645                        else {
3646#ifdef CONFIG_CIFS_SMB_DIRECT
3647                                if (rdata->mr) {
3648                                        rdata->mr->need_invalidate = true;
3649                                        smbd_deregister_mr(rdata->mr);
3650                                        rdata->mr = NULL;
3651                                }
3652#endif
3653                                rc = server->ops->async_readv(rdata);
3654                        }
3655                }
3656
3657                /* If the read was successfully sent, we are done */
3658                if (!rc) {
3659                        /* Add to aio pending list */
3660                        list_add_tail(&rdata->list, rdata_list);
3661                        return 0;
3662                }
3663
3664                /* Roll back credits and retry if needed */
3665                add_credits_and_wake_if(server, &rdata->credits, 0);
3666        } while (rc == -EAGAIN);
3667
3668fail:
3669        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3670        return rc;
3671}
3672
3673static int
3674cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3675                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3676                     struct cifs_aio_ctx *ctx)
3677{
3678        struct cifs_readdata *rdata;
3679        unsigned int npages, rsize;
3680        struct cifs_credits credits_on_stack;
3681        struct cifs_credits *credits = &credits_on_stack;
3682        size_t cur_len;
3683        int rc;
3684        pid_t pid;
3685        struct TCP_Server_Info *server;
3686        struct page **pagevec;
3687        size_t start;
3688        struct iov_iter direct_iov = ctx->iter;
3689
3690        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3691
3692        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3693                pid = open_file->pid;
3694        else
3695                pid = current->tgid;
3696
3697        if (ctx->direct_io)
3698                iov_iter_advance(&direct_iov, offset - ctx->pos);
3699
3700        do {
3701                if (open_file->invalidHandle) {
3702                        rc = cifs_reopen_file(open_file, true);
3703                        if (rc == -EAGAIN)
3704                                continue;
3705                        else if (rc)
3706                                break;
3707                }
3708
3709                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3710                                                   &rsize, credits);
3711                if (rc)
3712                        break;
3713
3714                cur_len = min_t(const size_t, len, rsize);
3715
3716                if (ctx->direct_io) {
3717                        ssize_t result;
3718
3719                        result = iov_iter_get_pages_alloc(
3720                                        &direct_iov, &pagevec,
3721                                        cur_len, &start);
3722                        if (result < 0) {
3723                                cifs_dbg(VFS,
3724                                         "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3725                                         result, iov_iter_type(&direct_iov),
3726                                         direct_iov.iov_offset,
3727                                         direct_iov.count);
3728                                dump_stack();
3729
3730                                rc = result;
3731                                add_credits_and_wake_if(server, credits, 0);
3732                                break;
3733                        }
3734                        cur_len = (size_t)result;
3735                        iov_iter_advance(&direct_iov, cur_len);
3736
3737                        rdata = cifs_readdata_direct_alloc(
3738                                        pagevec, cifs_uncached_readv_complete);
3739                        if (!rdata) {
3740                                add_credits_and_wake_if(server, credits, 0);
3741                                rc = -ENOMEM;
3742                                break;
3743                        }
3744
3745                        npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3746                        rdata->page_offset = start;
3747                        rdata->tailsz = npages > 1 ?
3748                                cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3749                                cur_len;
3750
3751                } else {
3752
3753                        npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3754                        /* allocate a readdata struct */
3755                        rdata = cifs_readdata_alloc(npages,
3756                                            cifs_uncached_readv_complete);
3757                        if (!rdata) {
3758                                add_credits_and_wake_if(server, credits, 0);
3759                                rc = -ENOMEM;
3760                                break;
3761                        }
3762
3763                        rc = cifs_read_allocate_pages(rdata, npages);
3764                        if (rc) {
3765                                kvfree(rdata->pages);
3766                                kfree(rdata);
3767                                add_credits_and_wake_if(server, credits, 0);
3768                                break;
3769                        }
3770
3771                        rdata->tailsz = PAGE_SIZE;
3772                }
3773
3774                rdata->server = server;
3775                rdata->cfile = cifsFileInfo_get(open_file);
3776                rdata->nr_pages = npages;
3777                rdata->offset = offset;
3778                rdata->bytes = cur_len;
3779                rdata->pid = pid;
3780                rdata->pagesz = PAGE_SIZE;
3781                rdata->read_into_pages = cifs_uncached_read_into_pages;
3782                rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3783                rdata->credits = credits_on_stack;
3784                rdata->ctx = ctx;
3785                kref_get(&ctx->refcount);
3786
3787                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3788
3789                if (!rc) {
3790                        if (rdata->cfile->invalidHandle)
3791                                rc = -EAGAIN;
3792                        else
3793                                rc = server->ops->async_readv(rdata);
3794                }
3795
3796                if (rc) {
3797                        add_credits_and_wake_if(server, &rdata->credits, 0);
3798                        kref_put(&rdata->refcount,
3799                                cifs_uncached_readdata_release);
3800                        if (rc == -EAGAIN) {
3801                                iov_iter_revert(&direct_iov, cur_len);
3802                                continue;
3803                        }
3804                        break;
3805                }
3806
3807                list_add_tail(&rdata->list, rdata_list);
3808                offset += cur_len;
3809                len -= cur_len;
3810        } while (len > 0);
3811
3812        return rc;
3813}
3814
3815static void
3816collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3817{
3818        struct cifs_readdata *rdata, *tmp;
3819        struct iov_iter *to = &ctx->iter;
3820        struct cifs_sb_info *cifs_sb;
3821        int rc;
3822
3823        cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3824
3825        mutex_lock(&ctx->aio_mutex);
3826
3827        if (list_empty(&ctx->list)) {
3828                mutex_unlock(&ctx->aio_mutex);
3829                return;
3830        }
3831
3832        rc = ctx->rc;
3833        /* the loop below should proceed in the order of increasing offsets */
3834again:
3835        list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3836                if (!rc) {
3837                        if (!try_wait_for_completion(&rdata->done)) {
3838                                mutex_unlock(&ctx->aio_mutex);
3839                                return;
3840                        }
3841
3842                        if (rdata->result == -EAGAIN) {
3843                                /* resend call if it's a retryable error */
3844                                struct list_head tmp_list;
3845                                unsigned int got_bytes = rdata->got_bytes;
3846
3847                                list_del_init(&rdata->list);
3848                                INIT_LIST_HEAD(&tmp_list);
3849
3850                                /*
3851                                 * Got a part of data and then reconnect has
3852                                 * happened -- fill the buffer and continue
3853                                 * reading.
3854                                 */
3855                                if (got_bytes && got_bytes < rdata->bytes) {
3856                                        rc = 0;
3857                                        if (!ctx->direct_io)
3858                                                rc = cifs_readdata_to_iov(rdata, to);
3859                                        if (rc) {
3860                                                kref_put(&rdata->refcount,
3861                                                        cifs_uncached_readdata_release);
3862                                                continue;
3863                                        }
3864                                }
3865
3866                                if (ctx->direct_io) {
3867                                        /*
3868                                         * Re-use rdata as this is a
3869                                         * direct I/O
3870                                         */
3871                                        rc = cifs_resend_rdata(
3872                                                rdata,
3873                                                &tmp_list, ctx);
3874                                } else {
3875                                        rc = cifs_send_async_read(
3876                                                rdata->offset + got_bytes,
3877                                                rdata->bytes - got_bytes,
3878                                                rdata->cfile, cifs_sb,
3879                                                &tmp_list, ctx);
3880
3881                                        kref_put(&rdata->refcount,
3882                                                cifs_uncached_readdata_release);
3883                                }
3884
3885                                list_splice(&tmp_list, &ctx->list);
3886
3887                                goto again;
3888                        } else if (rdata->result)
3889                                rc = rdata->result;
3890                        else if (!ctx->direct_io)
3891                                rc = cifs_readdata_to_iov(rdata, to);
3892
3893                        /* if there was a short read -- discard anything left */
3894                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3895                                rc = -ENODATA;
3896
3897                        ctx->total_len += rdata->got_bytes;
3898                }
3899                list_del_init(&rdata->list);
3900                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3901        }
3902
3903        if (!ctx->direct_io)
3904                ctx->total_len = ctx->len - iov_iter_count(to);
3905
3906        /* mask nodata case */
3907        if (rc == -ENODATA)
3908                rc = 0;
3909
3910        ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3911
3912        mutex_unlock(&ctx->aio_mutex);
3913
3914        if (ctx->iocb && ctx->iocb->ki_complete)
3915                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3916        else
3917                complete(&ctx->done);
3918}
3919
3920static ssize_t __cifs_readv(
3921        struct kiocb *iocb, struct iov_iter *to, bool direct)
3922{
3923        size_t len;
3924        struct file *file = iocb->ki_filp;
3925        struct cifs_sb_info *cifs_sb;
3926        struct cifsFileInfo *cfile;
3927        struct cifs_tcon *tcon;
3928        ssize_t rc, total_read = 0;
3929        loff_t offset = iocb->ki_pos;
3930        struct cifs_aio_ctx *ctx;
3931
3932        /*
3933         * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3934         * fall back to data copy read path
3935         * this could be improved by getting pages directly in ITER_KVEC
3936         */
3937        if (direct && iov_iter_is_kvec(to)) {
3938                cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3939                direct = false;
3940        }
3941
3942        len = iov_iter_count(to);
3943        if (!len)
3944                return 0;
3945
3946        cifs_sb = CIFS_FILE_SB(file);
3947        cfile = file->private_data;
3948        tcon = tlink_tcon(cfile->tlink);
3949
3950        if (!tcon->ses->server->ops->async_readv)
3951                return -ENOSYS;
3952
3953        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3954                cifs_dbg(FYI, "attempting read on write only file instance\n");
3955
3956        ctx = cifs_aio_ctx_alloc();
3957        if (!ctx)
3958                return -ENOMEM;
3959
3960        ctx->cfile = cifsFileInfo_get(cfile);
3961
3962        if (!is_sync_kiocb(iocb))
3963                ctx->iocb = iocb;
3964
3965        if (iter_is_iovec(to))
3966                ctx->should_dirty = true;
3967
3968        if (direct) {
3969                ctx->pos = offset;
3970                ctx->direct_io = true;
3971                ctx->iter = *to;
3972                ctx->len = len;
3973        } else {
3974                rc = setup_aio_ctx_iter(ctx, to, READ);
3975                if (rc) {
3976                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3977                        return rc;
3978                }
3979                len = ctx->len;
3980        }
3981
3982        /* grab a lock here due to read response handlers can access ctx */
3983        mutex_lock(&ctx->aio_mutex);
3984
3985        rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3986
3987        /* if at least one read request send succeeded, then reset rc */
3988        if (!list_empty(&ctx->list))
3989                rc = 0;
3990
3991        mutex_unlock(&ctx->aio_mutex);
3992
3993        if (rc) {
3994                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3995                return rc;
3996        }
3997
3998        if (!is_sync_kiocb(iocb)) {
3999                kref_put(&ctx->refcount, cifs_aio_ctx_release);
4000                return -EIOCBQUEUED;
4001        }
4002
4003        rc = wait_for_completion_killable(&ctx->done);
4004        if (rc) {
4005                mutex_lock(&ctx->aio_mutex);
4006                ctx->rc = rc = -EINTR;
4007                total_read = ctx->total_len;
4008                mutex_unlock(&ctx->aio_mutex);
4009        } else {
4010                rc = ctx->rc;
4011                total_read = ctx->total_len;
4012        }
4013
4014        kref_put(&ctx->refcount, cifs_aio_ctx_release);
4015
4016        if (total_read) {
4017                iocb->ki_pos += total_read;
4018                return total_read;
4019        }
4020        return rc;
4021}
4022
4023ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4024{
4025        return __cifs_readv(iocb, to, true);
4026}
4027
4028ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4029{
4030        return __cifs_readv(iocb, to, false);
4031}
4032
4033ssize_t
4034cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4035{
4036        struct inode *inode = file_inode(iocb->ki_filp);
4037        struct cifsInodeInfo *cinode = CIFS_I(inode);
4038        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4039        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4040                                                iocb->ki_filp->private_data;
4041        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4042        int rc = -EACCES;
4043
4044        /*
4045         * In strict cache mode we need to read from the server all the time
4046         * if we don't have level II oplock because the server can delay mtime
4047         * change - so we can't make a decision about inode invalidating.
4048         * And we can also fail with pagereading if there are mandatory locks
4049         * on pages affected by this read but not on the region from pos to
4050         * pos+len-1.
4051         */
4052        if (!CIFS_CACHE_READ(cinode))
4053                return cifs_user_readv(iocb, to);
4054
4055        if (cap_unix(tcon->ses) &&
4056            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4057            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4058                return generic_file_read_iter(iocb, to);
4059
4060        /*
4061         * We need to hold the sem to be sure nobody modifies lock list
4062         * with a brlock that prevents reading.
4063         */
4064        down_read(&cinode->lock_sem);
4065        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4066                                     tcon->ses->server->vals->shared_lock_type,
4067                                     0, NULL, CIFS_READ_OP))
4068                rc = generic_file_read_iter(iocb, to);
4069        up_read(&cinode->lock_sem);
4070        return rc;
4071}
4072
4073static ssize_t
4074cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4075{
4076        int rc = -EACCES;
4077        unsigned int bytes_read = 0;
4078        unsigned int total_read;
4079        unsigned int current_read_size;
4080        unsigned int rsize;
4081        struct cifs_sb_info *cifs_sb;
4082        struct cifs_tcon *tcon;
4083        struct TCP_Server_Info *server;
4084        unsigned int xid;
4085        char *cur_offset;
4086        struct cifsFileInfo *open_file;
4087        struct cifs_io_parms io_parms = {0};
4088        int buf_type = CIFS_NO_BUFFER;
4089        __u32 pid;
4090
4091        xid = get_xid();
4092        cifs_sb = CIFS_FILE_SB(file);
4093
4094        /* FIXME: set up handlers for larger reads and/or convert to async */
4095        rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4096
4097        if (file->private_data == NULL) {
4098                rc = -EBADF;
4099                free_xid(xid);
4100                return rc;
4101        }
4102        open_file = file->private_data;
4103        tcon = tlink_tcon(open_file->tlink);
4104        server = cifs_pick_channel(tcon->ses);
4105
4106        if (!server->ops->sync_read) {
4107                free_xid(xid);
4108                return -ENOSYS;
4109        }
4110
4111        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4112                pid = open_file->pid;
4113        else
4114                pid = current->tgid;
4115
4116        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4117                cifs_dbg(FYI, "attempting read on write only file instance\n");
4118
4119        for (total_read = 0, cur_offset = read_data; read_size > total_read;
4120             total_read += bytes_read, cur_offset += bytes_read) {
4121                do {
4122                        current_read_size = min_t(uint, read_size - total_read,
4123                                                  rsize);
4124                        /*
4125                         * For windows me and 9x we do not want to request more
4126                         * than it negotiated since it will refuse the read
4127                         * then.
4128                         */
4129                        if (!(tcon->ses->capabilities &
4130                                tcon->ses->server->vals->cap_large_files)) {
4131                                current_read_size = min_t(uint,
4132                                        current_read_size, CIFSMaxBufSize);
4133                        }
4134                        if (open_file->invalidHandle) {
4135                                rc = cifs_reopen_file(open_file, true);
4136                                if (rc != 0)
4137                                        break;
4138                        }
4139                        io_parms.pid = pid;
4140                        io_parms.tcon = tcon;
4141                        io_parms.offset = *offset;
4142                        io_parms.length = current_read_size;
4143                        io_parms.server = server;
4144                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4145                                                    &bytes_read, &cur_offset,
4146                                                    &buf_type);
4147                } while (rc == -EAGAIN);
4148
4149                if (rc || (bytes_read == 0)) {
4150                        if (total_read) {
4151                                break;
4152                        } else {
4153                                free_xid(xid);
4154                                return rc;
4155                        }
4156                } else {
4157                        cifs_stats_bytes_read(tcon, total_read);
4158                        *offset += bytes_read;
4159                }
4160        }
4161        free_xid(xid);
4162        return total_read;
4163}
4164
4165/*
4166 * If the page is mmap'ed into a process' page tables, then we need to make
4167 * sure that it doesn't change while being written back.
4168 */
4169static vm_fault_t
4170cifs_page_mkwrite(struct vm_fault *vmf)
4171{
4172        struct page *page = vmf->page;
4173
4174        lock_page(page);
4175        return VM_FAULT_LOCKED;
4176}
4177
4178static const struct vm_operations_struct cifs_file_vm_ops = {
4179        .fault = filemap_fault,
4180        .map_pages = filemap_map_pages,
4181        .page_mkwrite = cifs_page_mkwrite,
4182};
4183
4184int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4185{
4186        int xid, rc = 0;
4187        struct inode *inode = file_inode(file);
4188
4189        xid = get_xid();
4190
4191        if (!CIFS_CACHE_READ(CIFS_I(inode)))
4192                rc = cifs_zap_mapping(inode);
4193        if (!rc)
4194                rc = generic_file_mmap(file, vma);
4195        if (!rc)
4196                vma->vm_ops = &cifs_file_vm_ops;
4197
4198        free_xid(xid);
4199        return rc;
4200}
4201
4202int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4203{
4204        int rc, xid;
4205
4206        xid = get_xid();
4207
4208        rc = cifs_revalidate_file(file);
4209        if (rc)
4210                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4211                         rc);
4212        if (!rc)
4213                rc = generic_file_mmap(file, vma);
4214        if (!rc)
4215                vma->vm_ops = &cifs_file_vm_ops;
4216
4217        free_xid(xid);
4218        return rc;
4219}
4220
4221static void
4222cifs_readv_complete(struct work_struct *work)
4223{
4224        unsigned int i, got_bytes;
4225        struct cifs_readdata *rdata = container_of(work,
4226                                                struct cifs_readdata, work);
4227
4228        got_bytes = rdata->got_bytes;
4229        for (i = 0; i < rdata->nr_pages; i++) {
4230                struct page *page = rdata->pages[i];
4231
4232                lru_cache_add(page);
4233
4234                if (rdata->result == 0 ||
4235                    (rdata->result == -EAGAIN && got_bytes)) {
4236                        flush_dcache_page(page);
4237                        SetPageUptodate(page);
4238                }
4239
4240                unlock_page(page);
4241
4242                if (rdata->result == 0 ||
4243                    (rdata->result == -EAGAIN && got_bytes))
4244                        cifs_readpage_to_fscache(rdata->mapping->host, page);
4245
4246                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4247
4248                put_page(page);
4249                rdata->pages[i] = NULL;
4250        }
4251        kref_put(&rdata->refcount, cifs_readdata_release);
4252}
4253
4254static int
4255readpages_fill_pages(struct TCP_Server_Info *server,
4256                     struct cifs_readdata *rdata, struct iov_iter *iter,
4257                     unsigned int len)
4258{
4259        int result = 0;
4260        unsigned int i;
4261        u64 eof;
4262        pgoff_t eof_index;
4263        unsigned int nr_pages = rdata->nr_pages;
4264        unsigned int page_offset = rdata->page_offset;
4265
4266        /* determine the eof that the server (probably) has */
4267        eof = CIFS_I(rdata->mapping->host)->server_eof;
4268        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4269        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4270
4271        rdata->got_bytes = 0;
4272        rdata->tailsz = PAGE_SIZE;
4273        for (i = 0; i < nr_pages; i++) {
4274                struct page *page = rdata->pages[i];
4275                unsigned int to_read = rdata->pagesz;
4276                size_t n;
4277
4278                if (i == 0)
4279                        to_read -= page_offset;
4280                else
4281                        page_offset = 0;
4282
4283                n = to_read;
4284
4285                if (len >= to_read) {
4286                        len -= to_read;
4287                } else if (len > 0) {
4288                        /* enough for partial page, fill and zero the rest */
4289                        zero_user(page, len + page_offset, to_read - len);
4290                        n = rdata->tailsz = len;
4291                        len = 0;
4292                } else if (page->index > eof_index) {
4293                        /*
4294                         * The VFS will not try to do readahead past the
4295                         * i_size, but it's possible that we have outstanding
4296                         * writes with gaps in the middle and the i_size hasn't
4297                         * caught up yet. Populate those with zeroed out pages
4298                         * to prevent the VFS from repeatedly attempting to
4299                         * fill them until the writes are flushed.
4300                         */
4301                        zero_user(page, 0, PAGE_SIZE);
4302                        lru_cache_add(page);
4303                        flush_dcache_page(page);
4304                        SetPageUptodate(page);
4305                        unlock_page(page);
4306                        put_page(page);
4307                        rdata->pages[i] = NULL;
4308                        rdata->nr_pages--;
4309                        continue;
4310                } else {
4311                        /* no need to hold page hostage */
4312                        lru_cache_add(page);
4313                        unlock_page(page);
4314                        put_page(page);
4315                        rdata->pages[i] = NULL;
4316                        rdata->nr_pages--;
4317                        continue;
4318                }
4319
4320                if (iter)
4321                        result = copy_page_from_iter(
4322                                        page, page_offset, n, iter);
4323#ifdef CONFIG_CIFS_SMB_DIRECT
4324                else if (rdata->mr)
4325                        result = n;
4326#endif
4327                else
4328                        result = cifs_read_page_from_socket(
4329                                        server, page, page_offset, n);
4330                if (result < 0)
4331                        break;
4332
4333                rdata->got_bytes += result;
4334        }
4335
4336        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4337                                                rdata->got_bytes : result;
4338}
4339
4340static int
4341cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4342                               struct cifs_readdata *rdata, unsigned int len)
4343{
4344        return readpages_fill_pages(server, rdata, NULL, len);
4345}
4346
4347static int
4348cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4349                               struct cifs_readdata *rdata,
4350                               struct iov_iter *iter)
4351{
4352        return readpages_fill_pages(server, rdata, iter, iter->count);
4353}
4354
4355static int
4356readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4357                    unsigned int rsize, struct list_head *tmplist,
4358                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4359{
4360        struct page *page, *tpage;
4361        unsigned int expected_index;
4362        int rc;
4363        gfp_t gfp = readahead_gfp_mask(mapping);
4364
4365        INIT_LIST_HEAD(tmplist);
4366
4367        page = lru_to_page(page_list);
4368
4369        /*
4370         * Lock the page and put it in the cache. Since no one else
4371         * should have access to this page, we're safe to simply set
4372         * PG_locked without checking it first.
4373         */
4374        __SetPageLocked(page);
4375        rc = add_to_page_cache_locked(page, mapping,
4376                                      page->index, gfp);
4377
4378        /* give up if we can't stick it in the cache */
4379        if (rc) {
4380                __ClearPageLocked(page);
4381                return rc;
4382        }
4383
4384        /* move first page to the tmplist */
4385        *offset = (loff_t)page->index << PAGE_SHIFT;
4386        *bytes = PAGE_SIZE;
4387        *nr_pages = 1;
4388        list_move_tail(&page->lru, tmplist);
4389
4390        /* now try and add more pages onto the request */
4391        expected_index = page->index + 1;
4392        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4393                /* discontinuity ? */
4394                if (page->index != expected_index)
4395                        break;
4396
4397                /* would this page push the read over the rsize? */
4398                if (*bytes + PAGE_SIZE > rsize)
4399                        break;
4400
4401                __SetPageLocked(page);
4402                rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4403                if (rc) {
4404                        __ClearPageLocked(page);
4405                        break;
4406                }
4407                list_move_tail(&page->lru, tmplist);
4408                (*bytes) += PAGE_SIZE;
4409                expected_index++;
4410                (*nr_pages)++;
4411        }
4412        return rc;
4413}
4414
4415static int cifs_readpages(struct file *file, struct address_space *mapping,
4416        struct list_head *page_list, unsigned num_pages)
4417{
4418        int rc;
4419        int err = 0;
4420        struct list_head tmplist;
4421        struct cifsFileInfo *open_file = file->private_data;
4422        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4423        struct TCP_Server_Info *server;
4424        pid_t pid;
4425        unsigned int xid;
4426
4427        xid = get_xid();
4428        /*
4429         * Reads as many pages as possible from fscache. Returns -ENOBUFS
4430         * immediately if the cookie is negative
4431         *
4432         * After this point, every page in the list might have PG_fscache set,
4433         * so we will need to clean that up off of every page we don't use.
4434         */
4435        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4436                                         &num_pages);
4437        if (rc == 0) {
4438                free_xid(xid);
4439                return rc;
4440        }
4441
4442        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4443                pid = open_file->pid;
4444        else
4445                pid = current->tgid;
4446
4447        rc = 0;
4448        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4449
4450        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4451                 __func__, file, mapping, num_pages);
4452
4453        /*
4454         * Start with the page at end of list and move it to private
4455         * list. Do the same with any following pages until we hit
4456         * the rsize limit, hit an index discontinuity, or run out of
4457         * pages. Issue the async read and then start the loop again
4458         * until the list is empty.
4459         *
4460         * Note that list order is important. The page_list is in
4461         * the order of declining indexes. When we put the pages in
4462         * the rdata->pages, then we want them in increasing order.
4463         */
4464        while (!list_empty(page_list) && !err) {
4465                unsigned int i, nr_pages, bytes, rsize;
4466                loff_t offset;
4467                struct page *page, *tpage;
4468                struct cifs_readdata *rdata;
4469                struct cifs_credits credits_on_stack;
4470                struct cifs_credits *credits = &credits_on_stack;
4471
4472                if (open_file->invalidHandle) {
4473                        rc = cifs_reopen_file(open_file, true);
4474                        if (rc == -EAGAIN)
4475                                continue;
4476                        else if (rc)
4477                                break;
4478                }
4479
4480                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4481                                                   &rsize, credits);
4482                if (rc)
4483                        break;
4484
4485                /*
4486                 * Give up immediately if rsize is too small to read an entire
4487                 * page. The VFS will fall back to readpage. We should never
4488                 * reach this point however since we set ra_pages to 0 when the
4489                 * rsize is smaller than a cache page.
4490                 */
4491                if (unlikely(rsize < PAGE_SIZE)) {
4492                        add_credits_and_wake_if(server, credits, 0);
4493                        free_xid(xid);
4494                        return 0;
4495                }
4496
4497                nr_pages = 0;
4498                err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4499                                         &nr_pages, &offset, &bytes);
4500                if (!nr_pages) {
4501                        add_credits_and_wake_if(server, credits, 0);
4502                        break;
4503                }
4504
4505                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4506                if (!rdata) {
4507                        /* best to give up if we're out of mem */
4508                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4509                                list_del(&page->lru);
4510                                lru_cache_add(page);
4511                                unlock_page(page);
4512                                put_page(page);
4513                        }
4514                        rc = -ENOMEM;
4515                        add_credits_and_wake_if(server, credits, 0);
4516                        break;
4517                }
4518
4519                rdata->cfile = cifsFileInfo_get(open_file);
4520                rdata->server = server;
4521                rdata->mapping = mapping;
4522                rdata->offset = offset;
4523                rdata->bytes = bytes;
4524                rdata->pid = pid;
4525                rdata->pagesz = PAGE_SIZE;
4526                rdata->tailsz = PAGE_SIZE;
4527                rdata->read_into_pages = cifs_readpages_read_into_pages;
4528                rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4529                rdata->credits = credits_on_stack;
4530
4531                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4532                        list_del(&page->lru);
4533                        rdata->pages[rdata->nr_pages++] = page;
4534                }
4535
4536                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4537
4538                if (!rc) {
4539                        if (rdata->cfile->invalidHandle)
4540                                rc = -EAGAIN;
4541                        else
4542                                rc = server->ops->async_readv(rdata);
4543                }
4544
4545                if (rc) {
4546                        add_credits_and_wake_if(server, &rdata->credits, 0);
4547                        for (i = 0; i < rdata->nr_pages; i++) {
4548                                page = rdata->pages[i];
4549                                lru_cache_add(page);
4550                                unlock_page(page);
4551                                put_page(page);
4552                        }
4553                        /* Fallback to the readpage in error/reconnect cases */
4554                        kref_put(&rdata->refcount, cifs_readdata_release);
4555                        break;
4556                }
4557
4558                kref_put(&rdata->refcount, cifs_readdata_release);
4559        }
4560
4561        /* Any pages that have been shown to fscache but didn't get added to
4562         * the pagecache must be uncached before they get returned to the
4563         * allocator.
4564         */
4565        cifs_fscache_readpages_cancel(mapping->host, page_list);
4566        free_xid(xid);
4567        return rc;
4568}
4569
4570/*
4571 * cifs_readpage_worker must be called with the page pinned
4572 */
4573static int cifs_readpage_worker(struct file *file, struct page *page,
4574        loff_t *poffset)
4575{
4576        char *read_data;
4577        int rc;
4578
4579        /* Is the page cached? */
4580        rc = cifs_readpage_from_fscache(file_inode(file), page);
4581        if (rc == 0)
4582                goto read_complete;
4583
4584        read_data = kmap(page);
4585        /* for reads over a certain size could initiate async read ahead */
4586
4587        rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4588
4589        if (rc < 0)
4590                goto io_error;
4591        else
4592                cifs_dbg(FYI, "Bytes read %d\n", rc);
4593
4594        /* we do not want atime to be less than mtime, it broke some apps */
4595        file_inode(file)->i_atime = current_time(file_inode(file));
4596        if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4597                file_inode(file)->i_atime = file_inode(file)->i_mtime;
4598        else
4599                file_inode(file)->i_atime = current_time(file_inode(file));
4600
4601        if (PAGE_SIZE > rc)
4602                memset(read_data + rc, 0, PAGE_SIZE - rc);
4603
4604        flush_dcache_page(page);
4605        SetPageUptodate(page);
4606
4607        /* send this page to the cache */
4608        cifs_readpage_to_fscache(file_inode(file), page);
4609
4610        rc = 0;
4611
4612io_error:
4613        kunmap(page);
4614        unlock_page(page);
4615
4616read_complete:
4617        return rc;
4618}
4619
4620static int cifs_readpage(struct file *file, struct page *page)
4621{
4622        loff_t offset = page_file_offset(page);
4623        int rc = -EACCES;
4624        unsigned int xid;
4625
4626        xid = get_xid();
4627
4628        if (file->private_data == NULL) {
4629                rc = -EBADF;
4630                free_xid(xid);
4631                return rc;
4632        }
4633
4634        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4635                 page, (int)offset, (int)offset);
4636
4637        rc = cifs_readpage_worker(file, page, &offset);
4638
4639        free_xid(xid);
4640        return rc;
4641}
4642
4643static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4644{
4645        struct cifsFileInfo *open_file;
4646
4647        spin_lock(&cifs_inode->open_file_lock);
4648        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4649                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4650                        spin_unlock(&cifs_inode->open_file_lock);
4651                        return 1;
4652                }
4653        }
4654        spin_unlock(&cifs_inode->open_file_lock);
4655        return 0;
4656}
4657
4658/* We do not want to update the file size from server for inodes
4659   open for write - to avoid races with writepage extending
4660   the file - in the future we could consider allowing
4661   refreshing the inode only on increases in the file size
4662   but this is tricky to do without racing with writebehind
4663   page caching in the current Linux kernel design */
4664bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4665{
4666        if (!cifsInode)
4667                return true;
4668
4669        if (is_inode_writable(cifsInode)) {
4670                /* This inode is open for write at least once */
4671                struct cifs_sb_info *cifs_sb;
4672
4673                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4674                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4675                        /* since no page cache to corrupt on directio
4676                        we can change size safely */
4677                        return true;
4678                }
4679
4680                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4681                        return true;
4682
4683                return false;
4684        } else
4685                return true;
4686}
4687
4688static int cifs_write_begin(struct file *file, struct address_space *mapping,
4689                        loff_t pos, unsigned len, unsigned flags,
4690                        struct page **pagep, void **fsdata)
4691{
4692        int oncethru = 0;
4693        pgoff_t index = pos >> PAGE_SHIFT;
4694        loff_t offset = pos & (PAGE_SIZE - 1);
4695        loff_t page_start = pos & PAGE_MASK;
4696        loff_t i_size;
4697        struct page *page;
4698        int rc = 0;
4699
4700        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4701
4702start:
4703        page = grab_cache_page_write_begin(mapping, index, flags);
4704        if (!page) {
4705                rc = -ENOMEM;
4706                goto out;
4707        }
4708
4709        if (PageUptodate(page))
4710                goto out;
4711
4712        /*
4713         * If we write a full page it will be up to date, no need to read from
4714         * the server. If the write is short, we'll end up doing a sync write
4715         * instead.
4716         */
4717        if (len == PAGE_SIZE)
4718                goto out;
4719
4720        /*
4721         * optimize away the read when we have an oplock, and we're not
4722         * expecting to use any of the data we'd be reading in. That
4723         * is, when the page lies beyond the EOF, or straddles the EOF
4724         * and the write will cover all of the existing data.
4725         */
4726        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4727                i_size = i_size_read(mapping->host);
4728                if (page_start >= i_size ||
4729                    (offset == 0 && (pos + len) >= i_size)) {
4730                        zero_user_segments(page, 0, offset,
4731                                           offset + len,
4732                                           PAGE_SIZE);
4733                        /*
4734                         * PageChecked means that the parts of the page
4735                         * to which we're not writing are considered up
4736                         * to date. Once the data is copied to the
4737                         * page, it can be set uptodate.
4738                         */
4739                        SetPageChecked(page);
4740                        goto out;
4741                }
4742        }
4743
4744        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4745                /*
4746                 * might as well read a page, it is fast enough. If we get
4747                 * an error, we don't need to return it. cifs_write_end will
4748                 * do a sync write instead since PG_uptodate isn't set.
4749                 */
4750                cifs_readpage_worker(file, page, &page_start);
4751                put_page(page);
4752                oncethru = 1;
4753                goto start;
4754        } else {
4755                /* we could try using another file handle if there is one -
4756                   but how would we lock it to prevent close of that handle
4757                   racing with this read? In any case
4758                   this will be written out by write_end so is fine */
4759        }
4760out:
4761        *pagep = page;
4762        return rc;
4763}
4764
4765static int cifs_release_page(struct page *page, gfp_t gfp)
4766{
4767        if (PagePrivate(page))
4768                return 0;
4769
4770        return cifs_fscache_release_page(page, gfp);
4771}
4772
4773static void cifs_invalidate_page(struct page *page, unsigned int offset,
4774                                 unsigned int length)
4775{
4776        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4777
4778        if (offset == 0 && length == PAGE_SIZE)
4779                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4780}
4781
4782static int cifs_launder_page(struct page *page)
4783{
4784        int rc = 0;
4785        loff_t range_start = page_offset(page);
4786        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4787        struct writeback_control wbc = {
4788                .sync_mode = WB_SYNC_ALL,
4789                .nr_to_write = 0,
4790                .range_start = range_start,
4791                .range_end = range_end,
4792        };
4793
4794        cifs_dbg(FYI, "Launder page: %p\n", page);
4795
4796        if (clear_page_dirty_for_io(page))
4797                rc = cifs_writepage_locked(page, &wbc);
4798
4799        cifs_fscache_invalidate_page(page, page->mapping->host);
4800        return rc;
4801}
4802
4803void cifs_oplock_break(struct work_struct *work)
4804{
4805        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4806                                                  oplock_break);
4807        struct inode *inode = d_inode(cfile->dentry);
4808        struct cifsInodeInfo *cinode = CIFS_I(inode);
4809        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4810        struct TCP_Server_Info *server = tcon->ses->server;
4811        int rc = 0;
4812        bool purge_cache = false;
4813        bool is_deferred = false;
4814        struct cifs_deferred_close *dclose;
4815
4816        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4817                        TASK_UNINTERRUPTIBLE);
4818
4819        server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4820                                      cfile->oplock_epoch, &purge_cache);
4821
4822        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4823                                                cifs_has_mand_locks(cinode)) {
4824                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4825                         inode);
4826                cinode->oplock = 0;
4827        }
4828
4829        if (inode && S_ISREG(inode->i_mode)) {
4830                if (CIFS_CACHE_READ(cinode))
4831                        break_lease(inode, O_RDONLY);
4832                else
4833                        break_lease(inode, O_WRONLY);
4834                rc = filemap_fdatawrite(inode->i_mapping);
4835                if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4836                        rc = filemap_fdatawait(inode->i_mapping);
4837                        mapping_set_error(inode->i_mapping, rc);
4838                        cifs_zap_mapping(inode);
4839                }
4840                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4841                if (CIFS_CACHE_WRITE(cinode))
4842                        goto oplock_break_ack;
4843        }
4844
4845        rc = cifs_push_locks(cfile);
4846        if (rc)
4847                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4848
4849oplock_break_ack:
4850        /*
4851         * When oplock break is received and there are no active
4852         * file handles but cached, then schedule deferred close immediately.
4853         * So, new open will not use cached handle.
4854         */
4855        spin_lock(&CIFS_I(inode)->deferred_lock);
4856        is_deferred = cifs_is_deferred_close(cfile, &dclose);
4857        spin_unlock(&CIFS_I(inode)->deferred_lock);
4858        if (is_deferred &&
4859            cfile->deferred_close_scheduled &&
4860            delayed_work_pending(&cfile->deferred)) {
4861                if (cancel_delayed_work(&cfile->deferred)) {
4862                        _cifsFileInfo_put(cfile, false, false);
4863                        goto oplock_break_done;
4864                }
4865        }
4866        /*
4867         * releasing stale oplock after recent reconnect of smb session using
4868         * a now incorrect file handle is not a data integrity issue but do
4869         * not bother sending an oplock release if session to server still is
4870         * disconnected since oplock already released by the server
4871         */
4872        if (!cfile->oplock_break_cancelled) {
4873                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4874                                                             cinode);
4875                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4876        }
4877oplock_break_done:
4878        _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4879        cifs_done_oplock_break(cinode);
4880}
4881
4882/*
4883 * The presence of cifs_direct_io() in the address space ops vector
4884 * allowes open() O_DIRECT flags which would have failed otherwise.
4885 *
4886 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4887 * so this method should never be called.
4888 *
4889 * Direct IO is not yet supported in the cached mode. 
4890 */
4891static ssize_t
4892cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4893{
4894        /*
4895         * FIXME
4896         * Eventually need to support direct IO for non forcedirectio mounts
4897         */
4898        return -EINVAL;
4899}
4900
4901static int cifs_swap_activate(struct swap_info_struct *sis,
4902                              struct file *swap_file, sector_t *span)
4903{
4904        struct cifsFileInfo *cfile = swap_file->private_data;
4905        struct inode *inode = swap_file->f_mapping->host;
4906        unsigned long blocks;
4907        long long isize;
4908
4909        cifs_dbg(FYI, "swap activate\n");
4910
4911        spin_lock(&inode->i_lock);
4912        blocks = inode->i_blocks;
4913        isize = inode->i_size;
4914        spin_unlock(&inode->i_lock);
4915        if (blocks*512 < isize) {
4916                pr_warn("swap activate: swapfile has holes\n");
4917                return -EINVAL;
4918        }
4919        *span = sis->pages;
4920
4921        pr_warn_once("Swap support over SMB3 is experimental\n");
4922
4923        /*
4924         * TODO: consider adding ACL (or documenting how) to prevent other
4925         * users (on this or other systems) from reading it
4926         */
4927
4928
4929        /* TODO: add sk_set_memalloc(inet) or similar */
4930
4931        if (cfile)
4932                cfile->swapfile = true;
4933        /*
4934         * TODO: Since file already open, we can't open with DENY_ALL here
4935         * but we could add call to grab a byte range lock to prevent others
4936         * from reading or writing the file
4937         */
4938
4939        return 0;
4940}
4941
4942static void cifs_swap_deactivate(struct file *file)
4943{
4944        struct cifsFileInfo *cfile = file->private_data;
4945
4946        cifs_dbg(FYI, "swap deactivate\n");
4947
4948        /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4949
4950        if (cfile)
4951                cfile->swapfile = false;
4952
4953        /* do we need to unpin (or unlock) the file */
4954}
4955
4956const struct address_space_operations cifs_addr_ops = {
4957        .readpage = cifs_readpage,
4958        .readpages = cifs_readpages,
4959        .writepage = cifs_writepage,
4960        .writepages = cifs_writepages,
4961        .write_begin = cifs_write_begin,
4962        .write_end = cifs_write_end,
4963        .set_page_dirty = __set_page_dirty_nobuffers,
4964        .releasepage = cifs_release_page,
4965        .direct_IO = cifs_direct_io,
4966        .invalidatepage = cifs_invalidate_page,
4967        .launder_page = cifs_launder_page,
4968        /*
4969         * TODO: investigate and if useful we could add an cifs_migratePage
4970         * helper (under an CONFIG_MIGRATION) in the future, and also
4971         * investigate and add an is_dirty_writeback helper if needed
4972         */
4973        .swap_activate = cifs_swap_activate,
4974        .swap_deactivate = cifs_swap_deactivate,
4975};
4976
4977/*
4978 * cifs_readpages requires the server to support a buffer large enough to
4979 * contain the header plus one complete page of data.  Otherwise, we need
4980 * to leave cifs_readpages out of the address space operations.
4981 */
4982const struct address_space_operations cifs_addr_ops_smallbuf = {
4983        .readpage = cifs_readpage,
4984        .writepage = cifs_writepage,
4985        .writepages = cifs_writepages,
4986        .write_begin = cifs_write_begin,
4987        .write_end = cifs_write_end,
4988        .set_page_dirty = __set_page_dirty_nobuffers,
4989        .releasepage = cifs_release_page,
4990        .invalidatepage = cifs_invalidate_page,
4991        .launder_page = cifs_launder_page,
4992};
4993