linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45#include "smbdirect.h"
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_remap(cifs_sb));
 144        cifs_put_tlink(tlink);
 145
 146        if (rc)
 147                goto posix_open_ret;
 148
 149        if (presp_data->Type == cpu_to_le32(-1))
 150                goto posix_open_ret; /* open ok, caller does qpathinfo */
 151
 152        if (!pinode)
 153                goto posix_open_ret; /* caller does not need info */
 154
 155        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 156
 157        /* get new inode and set it up */
 158        if (*pinode == NULL) {
 159                cifs_fill_uniqueid(sb, &fattr);
 160                *pinode = cifs_iget(sb, &fattr);
 161                if (!*pinode) {
 162                        rc = -ENOMEM;
 163                        goto posix_open_ret;
 164                }
 165        } else {
 166                cifs_fattr_to_inode(*pinode, &fattr);
 167        }
 168
 169posix_open_ret:
 170        kfree(presp_data);
 171        return rc;
 172}
 173
 174static int
 175cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 176             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 177             struct cifs_fid *fid, unsigned int xid)
 178{
 179        int rc;
 180        int desired_access;
 181        int disposition;
 182        int create_options = CREATE_NOT_DIR;
 183        FILE_ALL_INFO *buf;
 184        struct TCP_Server_Info *server = tcon->ses->server;
 185        struct cifs_open_parms oparms;
 186
 187        if (!server->ops->open)
 188                return -ENOSYS;
 189
 190        desired_access = cifs_convert_flags(f_flags);
 191
 192/*********************************************************************
 193 *  open flag mapping table:
 194 *
 195 *      POSIX Flag            CIFS Disposition
 196 *      ----------            ----------------
 197 *      O_CREAT               FILE_OPEN_IF
 198 *      O_CREAT | O_EXCL      FILE_CREATE
 199 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 200 *      O_TRUNC               FILE_OVERWRITE
 201 *      none of the above     FILE_OPEN
 202 *
 203 *      Note that there is not a direct match between disposition
 204 *      FILE_SUPERSEDE (ie create whether or not file exists although
 205 *      O_CREAT | O_TRUNC is similar but truncates the existing
 206 *      file rather than creating a new file as FILE_SUPERSEDE does
 207 *      (which uses the attributes / metadata passed in on open call)
 208 *?
 209 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 210 *?  and the read write flags match reasonably.  O_LARGEFILE
 211 *?  is irrelevant because largefile support is always used
 212 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 213 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 214 *********************************************************************/
 215
 216        disposition = cifs_get_disposition(f_flags);
 217
 218        /* BB pass O_SYNC flag through on file attributes .. BB */
 219
 220        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 221        if (!buf)
 222                return -ENOMEM;
 223
 224        if (backup_cred(cifs_sb))
 225                create_options |= CREATE_OPEN_BACKUP_INTENT;
 226
 227        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 228        if (f_flags & O_SYNC)
 229                create_options |= CREATE_WRITE_THROUGH;
 230
 231        if (f_flags & O_DIRECT)
 232                create_options |= CREATE_NO_BUFFER;
 233
 234        oparms.tcon = tcon;
 235        oparms.cifs_sb = cifs_sb;
 236        oparms.desired_access = desired_access;
 237        oparms.create_options = create_options;
 238        oparms.disposition = disposition;
 239        oparms.path = full_path;
 240        oparms.fid = fid;
 241        oparms.reconnect = false;
 242
 243        rc = server->ops->open(xid, &oparms, oplock, buf);
 244
 245        if (rc)
 246                goto out;
 247
 248        if (tcon->unix_ext)
 249                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 250                                              xid);
 251        else
 252                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 253                                         xid, fid);
 254
 255out:
 256        kfree(buf);
 257        return rc;
 258}
 259
 260static bool
 261cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 262{
 263        struct cifs_fid_locks *cur;
 264        bool has_locks = false;
 265
 266        down_read(&cinode->lock_sem);
 267        list_for_each_entry(cur, &cinode->llist, llist) {
 268                if (!list_empty(&cur->locks)) {
 269                        has_locks = true;
 270                        break;
 271                }
 272        }
 273        up_read(&cinode->lock_sem);
 274        return has_locks;
 275}
 276
 277struct cifsFileInfo *
 278cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 279                  struct tcon_link *tlink, __u32 oplock)
 280{
 281        struct dentry *dentry = file_dentry(file);
 282        struct inode *inode = d_inode(dentry);
 283        struct cifsInodeInfo *cinode = CIFS_I(inode);
 284        struct cifsFileInfo *cfile;
 285        struct cifs_fid_locks *fdlocks;
 286        struct cifs_tcon *tcon = tlink_tcon(tlink);
 287        struct TCP_Server_Info *server = tcon->ses->server;
 288
 289        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 290        if (cfile == NULL)
 291                return cfile;
 292
 293        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 294        if (!fdlocks) {
 295                kfree(cfile);
 296                return NULL;
 297        }
 298
 299        INIT_LIST_HEAD(&fdlocks->locks);
 300        fdlocks->cfile = cfile;
 301        cfile->llist = fdlocks;
 302        down_write(&cinode->lock_sem);
 303        list_add(&fdlocks->llist, &cinode->llist);
 304        up_write(&cinode->lock_sem);
 305
 306        cfile->count = 1;
 307        cfile->pid = current->tgid;
 308        cfile->uid = current_fsuid();
 309        cfile->dentry = dget(dentry);
 310        cfile->f_flags = file->f_flags;
 311        cfile->invalidHandle = false;
 312        cfile->tlink = cifs_get_tlink(tlink);
 313        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 314        mutex_init(&cfile->fh_mutex);
 315        spin_lock_init(&cfile->file_info_lock);
 316
 317        cifs_sb_active(inode->i_sb);
 318
 319        /*
 320         * If the server returned a read oplock and we have mandatory brlocks,
 321         * set oplock level to None.
 322         */
 323        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 324                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 325                oplock = 0;
 326        }
 327
 328        spin_lock(&tcon->open_file_lock);
 329        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 330                oplock = fid->pending_open->oplock;
 331        list_del(&fid->pending_open->olist);
 332
 333        fid->purge_cache = false;
 334        server->ops->set_fid(cfile, fid, oplock);
 335
 336        list_add(&cfile->tlist, &tcon->openFileList);
 337
 338        /* if readable file instance put first in list*/
 339        if (file->f_mode & FMODE_READ)
 340                list_add(&cfile->flist, &cinode->openFileList);
 341        else
 342                list_add_tail(&cfile->flist, &cinode->openFileList);
 343        spin_unlock(&tcon->open_file_lock);
 344
 345        if (fid->purge_cache)
 346                cifs_zap_mapping(inode);
 347
 348        file->private_data = cfile;
 349        return cfile;
 350}
 351
 352struct cifsFileInfo *
 353cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 354{
 355        spin_lock(&cifs_file->file_info_lock);
 356        cifsFileInfo_get_locked(cifs_file);
 357        spin_unlock(&cifs_file->file_info_lock);
 358        return cifs_file;
 359}
 360
 361/*
 362 * Release a reference on the file private data. This may involve closing
 363 * the filehandle out on the server. Must be called without holding
 364 * tcon->open_file_lock and cifs_file->file_info_lock.
 365 */
 366void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 367{
 368        struct inode *inode = d_inode(cifs_file->dentry);
 369        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 370        struct TCP_Server_Info *server = tcon->ses->server;
 371        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 372        struct super_block *sb = inode->i_sb;
 373        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 374        struct cifsLockInfo *li, *tmp;
 375        struct cifs_fid fid;
 376        struct cifs_pending_open open;
 377        bool oplock_break_cancelled;
 378
 379        spin_lock(&tcon->open_file_lock);
 380
 381        spin_lock(&cifs_file->file_info_lock);
 382        if (--cifs_file->count > 0) {
 383                spin_unlock(&cifs_file->file_info_lock);
 384                spin_unlock(&tcon->open_file_lock);
 385                return;
 386        }
 387        spin_unlock(&cifs_file->file_info_lock);
 388
 389        if (server->ops->get_lease_key)
 390                server->ops->get_lease_key(inode, &fid);
 391
 392        /* store open in pending opens to make sure we don't miss lease break */
 393        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 394
 395        /* remove it from the lists */
 396        list_del(&cifs_file->flist);
 397        list_del(&cifs_file->tlist);
 398
 399        if (list_empty(&cifsi->openFileList)) {
 400                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 401                         d_inode(cifs_file->dentry));
 402                /*
 403                 * In strict cache mode we need invalidate mapping on the last
 404                 * close  because it may cause a error when we open this file
 405                 * again and get at least level II oplock.
 406                 */
 407                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 408                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 409                cifs_set_oplock_level(cifsi, 0);
 410        }
 411
 412        spin_unlock(&tcon->open_file_lock);
 413
 414        oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 415
 416        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 417                struct TCP_Server_Info *server = tcon->ses->server;
 418                unsigned int xid;
 419
 420                xid = get_xid();
 421                if (server->ops->close)
 422                        server->ops->close(xid, tcon, &cifs_file->fid);
 423                _free_xid(xid);
 424        }
 425
 426        if (oplock_break_cancelled)
 427                cifs_done_oplock_break(cifsi);
 428
 429        cifs_del_pending_open(&open);
 430
 431        /*
 432         * Delete any outstanding lock records. We'll lose them when the file
 433         * is closed anyway.
 434         */
 435        down_write(&cifsi->lock_sem);
 436        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 437                list_del(&li->llist);
 438                cifs_del_lock_waiters(li);
 439                kfree(li);
 440        }
 441        list_del(&cifs_file->llist->llist);
 442        kfree(cifs_file->llist);
 443        up_write(&cifsi->lock_sem);
 444
 445        cifs_put_tlink(cifs_file->tlink);
 446        dput(cifs_file->dentry);
 447        cifs_sb_deactive(sb);
 448        kfree(cifs_file);
 449}
 450
 451int cifs_open(struct inode *inode, struct file *file)
 452
 453{
 454        int rc = -EACCES;
 455        unsigned int xid;
 456        __u32 oplock;
 457        struct cifs_sb_info *cifs_sb;
 458        struct TCP_Server_Info *server;
 459        struct cifs_tcon *tcon;
 460        struct tcon_link *tlink;
 461        struct cifsFileInfo *cfile = NULL;
 462        char *full_path = NULL;
 463        bool posix_open_ok = false;
 464        struct cifs_fid fid;
 465        struct cifs_pending_open open;
 466
 467        xid = get_xid();
 468
 469        cifs_sb = CIFS_SB(inode->i_sb);
 470        tlink = cifs_sb_tlink(cifs_sb);
 471        if (IS_ERR(tlink)) {
 472                free_xid(xid);
 473                return PTR_ERR(tlink);
 474        }
 475        tcon = tlink_tcon(tlink);
 476        server = tcon->ses->server;
 477
 478        full_path = build_path_from_dentry(file_dentry(file));
 479        if (full_path == NULL) {
 480                rc = -ENOMEM;
 481                goto out;
 482        }
 483
 484        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 485                 inode, file->f_flags, full_path);
 486
 487        if (file->f_flags & O_DIRECT &&
 488            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 489                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 490                        file->f_op = &cifs_file_direct_nobrl_ops;
 491                else
 492                        file->f_op = &cifs_file_direct_ops;
 493        }
 494
 495        if (server->oplocks)
 496                oplock = REQ_OPLOCK;
 497        else
 498                oplock = 0;
 499
 500        if (!tcon->broken_posix_open && tcon->unix_ext &&
 501            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 502                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 503                /* can not refresh inode info since size could be stale */
 504                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 505                                cifs_sb->mnt_file_mode /* ignored */,
 506                                file->f_flags, &oplock, &fid.netfid, xid);
 507                if (rc == 0) {
 508                        cifs_dbg(FYI, "posix open succeeded\n");
 509                        posix_open_ok = true;
 510                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 511                        if (tcon->ses->serverNOS)
 512                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 513                                         tcon->ses->serverName,
 514                                         tcon->ses->serverNOS);
 515                        tcon->broken_posix_open = true;
 516                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 517                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 518                        goto out;
 519                /*
 520                 * Else fallthrough to retry open the old way on network i/o
 521                 * or DFS errors.
 522                 */
 523        }
 524
 525        if (server->ops->get_lease_key)
 526                server->ops->get_lease_key(inode, &fid);
 527
 528        cifs_add_pending_open(&fid, tlink, &open);
 529
 530        if (!posix_open_ok) {
 531                if (server->ops->get_lease_key)
 532                        server->ops->get_lease_key(inode, &fid);
 533
 534                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 535                                  file->f_flags, &oplock, &fid, xid);
 536                if (rc) {
 537                        cifs_del_pending_open(&open);
 538                        goto out;
 539                }
 540        }
 541
 542        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 543        if (cfile == NULL) {
 544                if (server->ops->close)
 545                        server->ops->close(xid, tcon, &fid);
 546                cifs_del_pending_open(&open);
 547                rc = -ENOMEM;
 548                goto out;
 549        }
 550
 551        cifs_fscache_set_inode_cookie(inode, file);
 552
 553        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 554                /*
 555                 * Time to set mode which we can not set earlier due to
 556                 * problems creating new read-only files.
 557                 */
 558                struct cifs_unix_set_info_args args = {
 559                        .mode   = inode->i_mode,
 560                        .uid    = INVALID_UID, /* no change */
 561                        .gid    = INVALID_GID, /* no change */
 562                        .ctime  = NO_CHANGE_64,
 563                        .atime  = NO_CHANGE_64,
 564                        .mtime  = NO_CHANGE_64,
 565                        .device = 0,
 566                };
 567                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 568                                       cfile->pid);
 569        }
 570
 571out:
 572        kfree(full_path);
 573        free_xid(xid);
 574        cifs_put_tlink(tlink);
 575        return rc;
 576}
 577
 578static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 579
 580/*
 581 * Try to reacquire byte range locks that were released when session
 582 * to server was lost.
 583 */
 584static int
 585cifs_relock_file(struct cifsFileInfo *cfile)
 586{
 587        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 588        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 589        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 590        int rc = 0;
 591
 592        down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 593        if (cinode->can_cache_brlcks) {
 594                /* can cache locks - no need to relock */
 595                up_read(&cinode->lock_sem);
 596                return rc;
 597        }
 598
 599        if (cap_unix(tcon->ses) &&
 600            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 601            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 602                rc = cifs_push_posix_locks(cfile);
 603        else
 604                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 605
 606        up_read(&cinode->lock_sem);
 607        return rc;
 608}
 609
 610static int
 611cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 612{
 613        int rc = -EACCES;
 614        unsigned int xid;
 615        __u32 oplock;
 616        struct cifs_sb_info *cifs_sb;
 617        struct cifs_tcon *tcon;
 618        struct TCP_Server_Info *server;
 619        struct cifsInodeInfo *cinode;
 620        struct inode *inode;
 621        char *full_path = NULL;
 622        int desired_access;
 623        int disposition = FILE_OPEN;
 624        int create_options = CREATE_NOT_DIR;
 625        struct cifs_open_parms oparms;
 626
 627        xid = get_xid();
 628        mutex_lock(&cfile->fh_mutex);
 629        if (!cfile->invalidHandle) {
 630                mutex_unlock(&cfile->fh_mutex);
 631                rc = 0;
 632                free_xid(xid);
 633                return rc;
 634        }
 635
 636        inode = d_inode(cfile->dentry);
 637        cifs_sb = CIFS_SB(inode->i_sb);
 638        tcon = tlink_tcon(cfile->tlink);
 639        server = tcon->ses->server;
 640
 641        /*
 642         * Can not grab rename sem here because various ops, including those
 643         * that already have the rename sem can end up causing writepage to get
 644         * called and if the server was down that means we end up here, and we
 645         * can never tell if the caller already has the rename_sem.
 646         */
 647        full_path = build_path_from_dentry(cfile->dentry);
 648        if (full_path == NULL) {
 649                rc = -ENOMEM;
 650                mutex_unlock(&cfile->fh_mutex);
 651                free_xid(xid);
 652                return rc;
 653        }
 654
 655        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 656                 inode, cfile->f_flags, full_path);
 657
 658        if (tcon->ses->server->oplocks)
 659                oplock = REQ_OPLOCK;
 660        else
 661                oplock = 0;
 662
 663        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 664            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 665                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 666                /*
 667                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 668                 * original open. Must mask them off for a reopen.
 669                 */
 670                unsigned int oflags = cfile->f_flags &
 671                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 672
 673                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 674                                     cifs_sb->mnt_file_mode /* ignored */,
 675                                     oflags, &oplock, &cfile->fid.netfid, xid);
 676                if (rc == 0) {
 677                        cifs_dbg(FYI, "posix reopen succeeded\n");
 678                        oparms.reconnect = true;
 679                        goto reopen_success;
 680                }
 681                /*
 682                 * fallthrough to retry open the old way on errors, especially
 683                 * in the reconnect path it is important to retry hard
 684                 */
 685        }
 686
 687        desired_access = cifs_convert_flags(cfile->f_flags);
 688
 689        if (backup_cred(cifs_sb))
 690                create_options |= CREATE_OPEN_BACKUP_INTENT;
 691
 692        if (server->ops->get_lease_key)
 693                server->ops->get_lease_key(inode, &cfile->fid);
 694
 695        oparms.tcon = tcon;
 696        oparms.cifs_sb = cifs_sb;
 697        oparms.desired_access = desired_access;
 698        oparms.create_options = create_options;
 699        oparms.disposition = disposition;
 700        oparms.path = full_path;
 701        oparms.fid = &cfile->fid;
 702        oparms.reconnect = true;
 703
 704        /*
 705         * Can not refresh inode by passing in file_info buf to be returned by
 706         * ops->open and then calling get_inode_info with returned buf since
 707         * file might have write behind data that needs to be flushed and server
 708         * version of file size can be stale. If we knew for sure that inode was
 709         * not dirty locally we could do this.
 710         */
 711        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 712        if (rc == -ENOENT && oparms.reconnect == false) {
 713                /* durable handle timeout is expired - open the file again */
 714                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 715                /* indicate that we need to relock the file */
 716                oparms.reconnect = true;
 717        }
 718
 719        if (rc) {
 720                mutex_unlock(&cfile->fh_mutex);
 721                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 722                cifs_dbg(FYI, "oplock: %d\n", oplock);
 723                goto reopen_error_exit;
 724        }
 725
 726reopen_success:
 727        cfile->invalidHandle = false;
 728        mutex_unlock(&cfile->fh_mutex);
 729        cinode = CIFS_I(inode);
 730
 731        if (can_flush) {
 732                rc = filemap_write_and_wait(inode->i_mapping);
 733                mapping_set_error(inode->i_mapping, rc);
 734
 735                if (tcon->unix_ext)
 736                        rc = cifs_get_inode_info_unix(&inode, full_path,
 737                                                      inode->i_sb, xid);
 738                else
 739                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 740                                                 inode->i_sb, xid, NULL);
 741        }
 742        /*
 743         * Else we are writing out data to server already and could deadlock if
 744         * we tried to flush data, and since we do not know if we have data that
 745         * would invalidate the current end of file on the server we can not go
 746         * to the server to get the new inode info.
 747         */
 748
 749        /*
 750         * If the server returned a read oplock and we have mandatory brlocks,
 751         * set oplock level to None.
 752         */
 753        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 754                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 755                oplock = 0;
 756        }
 757
 758        server->ops->set_fid(cfile, &cfile->fid, oplock);
 759        if (oparms.reconnect)
 760                cifs_relock_file(cfile);
 761
 762reopen_error_exit:
 763        kfree(full_path);
 764        free_xid(xid);
 765        return rc;
 766}
 767
 768int cifs_close(struct inode *inode, struct file *file)
 769{
 770        if (file->private_data != NULL) {
 771                cifsFileInfo_put(file->private_data);
 772                file->private_data = NULL;
 773        }
 774
 775        /* return code from the ->release op is always ignored */
 776        return 0;
 777}
 778
 779void
 780cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 781{
 782        struct cifsFileInfo *open_file;
 783        struct list_head *tmp;
 784        struct list_head *tmp1;
 785        struct list_head tmp_list;
 786
 787        if (!tcon->use_persistent || !tcon->need_reopen_files)
 788                return;
 789
 790        tcon->need_reopen_files = false;
 791
 792        cifs_dbg(FYI, "Reopen persistent handles");
 793        INIT_LIST_HEAD(&tmp_list);
 794
 795        /* list all files open on tree connection, reopen resilient handles  */
 796        spin_lock(&tcon->open_file_lock);
 797        list_for_each(tmp, &tcon->openFileList) {
 798                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 799                if (!open_file->invalidHandle)
 800                        continue;
 801                cifsFileInfo_get(open_file);
 802                list_add_tail(&open_file->rlist, &tmp_list);
 803        }
 804        spin_unlock(&tcon->open_file_lock);
 805
 806        list_for_each_safe(tmp, tmp1, &tmp_list) {
 807                open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 808                if (cifs_reopen_file(open_file, false /* do not flush */))
 809                        tcon->need_reopen_files = true;
 810                list_del_init(&open_file->rlist);
 811                cifsFileInfo_put(open_file);
 812        }
 813}
 814
 815int cifs_closedir(struct inode *inode, struct file *file)
 816{
 817        int rc = 0;
 818        unsigned int xid;
 819        struct cifsFileInfo *cfile = file->private_data;
 820        struct cifs_tcon *tcon;
 821        struct TCP_Server_Info *server;
 822        char *buf;
 823
 824        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 825
 826        if (cfile == NULL)
 827                return rc;
 828
 829        xid = get_xid();
 830        tcon = tlink_tcon(cfile->tlink);
 831        server = tcon->ses->server;
 832
 833        cifs_dbg(FYI, "Freeing private data in close dir\n");
 834        spin_lock(&cfile->file_info_lock);
 835        if (server->ops->dir_needs_close(cfile)) {
 836                cfile->invalidHandle = true;
 837                spin_unlock(&cfile->file_info_lock);
 838                if (server->ops->close_dir)
 839                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 840                else
 841                        rc = -ENOSYS;
 842                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 843                /* not much we can do if it fails anyway, ignore rc */
 844                rc = 0;
 845        } else
 846                spin_unlock(&cfile->file_info_lock);
 847
 848        buf = cfile->srch_inf.ntwrk_buf_start;
 849        if (buf) {
 850                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 851                cfile->srch_inf.ntwrk_buf_start = NULL;
 852                if (cfile->srch_inf.smallBuf)
 853                        cifs_small_buf_release(buf);
 854                else
 855                        cifs_buf_release(buf);
 856        }
 857
 858        cifs_put_tlink(cfile->tlink);
 859        kfree(file->private_data);
 860        file->private_data = NULL;
 861        /* BB can we lock the filestruct while this is going on? */
 862        free_xid(xid);
 863        return rc;
 864}
 865
 866static struct cifsLockInfo *
 867cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 868{
 869        struct cifsLockInfo *lock =
 870                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 871        if (!lock)
 872                return lock;
 873        lock->offset = offset;
 874        lock->length = length;
 875        lock->type = type;
 876        lock->pid = current->tgid;
 877        INIT_LIST_HEAD(&lock->blist);
 878        init_waitqueue_head(&lock->block_q);
 879        return lock;
 880}
 881
 882void
 883cifs_del_lock_waiters(struct cifsLockInfo *lock)
 884{
 885        struct cifsLockInfo *li, *tmp;
 886        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 887                list_del_init(&li->blist);
 888                wake_up(&li->block_q);
 889        }
 890}
 891
 892#define CIFS_LOCK_OP    0
 893#define CIFS_READ_OP    1
 894#define CIFS_WRITE_OP   2
 895
 896/* @rw_check : 0 - no op, 1 - read, 2 - write */
 897static bool
 898cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 899                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 900                            struct cifsLockInfo **conf_lock, int rw_check)
 901{
 902        struct cifsLockInfo *li;
 903        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 904        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 905
 906        list_for_each_entry(li, &fdlocks->locks, llist) {
 907                if (offset + length <= li->offset ||
 908                    offset >= li->offset + li->length)
 909                        continue;
 910                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 911                    server->ops->compare_fids(cfile, cur_cfile)) {
 912                        /* shared lock prevents write op through the same fid */
 913                        if (!(li->type & server->vals->shared_lock_type) ||
 914                            rw_check != CIFS_WRITE_OP)
 915                                continue;
 916                }
 917                if ((type & server->vals->shared_lock_type) &&
 918                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 919                     current->tgid == li->pid) || type == li->type))
 920                        continue;
 921                if (conf_lock)
 922                        *conf_lock = li;
 923                return true;
 924        }
 925        return false;
 926}
 927
 928bool
 929cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 930                        __u8 type, struct cifsLockInfo **conf_lock,
 931                        int rw_check)
 932{
 933        bool rc = false;
 934        struct cifs_fid_locks *cur;
 935        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 936
 937        list_for_each_entry(cur, &cinode->llist, llist) {
 938                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 939                                                 cfile, conf_lock, rw_check);
 940                if (rc)
 941                        break;
 942        }
 943
 944        return rc;
 945}
 946
 947/*
 948 * Check if there is another lock that prevents us to set the lock (mandatory
 949 * style). If such a lock exists, update the flock structure with its
 950 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 951 * or leave it the same if we can't. Returns 0 if we don't need to request to
 952 * the server or 1 otherwise.
 953 */
 954static int
 955cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 956               __u8 type, struct file_lock *flock)
 957{
 958        int rc = 0;
 959        struct cifsLockInfo *conf_lock;
 960        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 961        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 962        bool exist;
 963
 964        down_read(&cinode->lock_sem);
 965
 966        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 967                                        &conf_lock, CIFS_LOCK_OP);
 968        if (exist) {
 969                flock->fl_start = conf_lock->offset;
 970                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 971                flock->fl_pid = conf_lock->pid;
 972                if (conf_lock->type & server->vals->shared_lock_type)
 973                        flock->fl_type = F_RDLCK;
 974                else
 975                        flock->fl_type = F_WRLCK;
 976        } else if (!cinode->can_cache_brlcks)
 977                rc = 1;
 978        else
 979                flock->fl_type = F_UNLCK;
 980
 981        up_read(&cinode->lock_sem);
 982        return rc;
 983}
 984
 985static void
 986cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 987{
 988        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 989        down_write(&cinode->lock_sem);
 990        list_add_tail(&lock->llist, &cfile->llist->locks);
 991        up_write(&cinode->lock_sem);
 992}
 993
 994/*
 995 * Set the byte-range lock (mandatory style). Returns:
 996 * 1) 0, if we set the lock and don't need to request to the server;
 997 * 2) 1, if no locks prevent us but we need to request to the server;
 998 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 999 */
1000static int
1001cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1002                 bool wait)
1003{
1004        struct cifsLockInfo *conf_lock;
1005        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1006        bool exist;
1007        int rc = 0;
1008
1009try_again:
1010        exist = false;
1011        down_write(&cinode->lock_sem);
1012
1013        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1014                                        lock->type, &conf_lock, CIFS_LOCK_OP);
1015        if (!exist && cinode->can_cache_brlcks) {
1016                list_add_tail(&lock->llist, &cfile->llist->locks);
1017                up_write(&cinode->lock_sem);
1018                return rc;
1019        }
1020
1021        if (!exist)
1022                rc = 1;
1023        else if (!wait)
1024                rc = -EACCES;
1025        else {
1026                list_add_tail(&lock->blist, &conf_lock->blist);
1027                up_write(&cinode->lock_sem);
1028                rc = wait_event_interruptible(lock->block_q,
1029                                        (lock->blist.prev == &lock->blist) &&
1030                                        (lock->blist.next == &lock->blist));
1031                if (!rc)
1032                        goto try_again;
1033                down_write(&cinode->lock_sem);
1034                list_del_init(&lock->blist);
1035        }
1036
1037        up_write(&cinode->lock_sem);
1038        return rc;
1039}
1040
1041/*
1042 * Check if there is another lock that prevents us to set the lock (posix
1043 * style). If such a lock exists, update the flock structure with its
1044 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1045 * or leave it the same if we can't. Returns 0 if we don't need to request to
1046 * the server or 1 otherwise.
1047 */
1048static int
1049cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1050{
1051        int rc = 0;
1052        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1053        unsigned char saved_type = flock->fl_type;
1054
1055        if ((flock->fl_flags & FL_POSIX) == 0)
1056                return 1;
1057
1058        down_read(&cinode->lock_sem);
1059        posix_test_lock(file, flock);
1060
1061        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1062                flock->fl_type = saved_type;
1063                rc = 1;
1064        }
1065
1066        up_read(&cinode->lock_sem);
1067        return rc;
1068}
1069
1070/*
1071 * Set the byte-range lock (posix style). Returns:
1072 * 1) 0, if we set the lock and don't need to request to the server;
1073 * 2) 1, if we need to request to the server;
1074 * 3) <0, if the error occurs while setting the lock.
1075 */
1076static int
1077cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1078{
1079        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1080        int rc = 1;
1081
1082        if ((flock->fl_flags & FL_POSIX) == 0)
1083                return rc;
1084
1085try_again:
1086        down_write(&cinode->lock_sem);
1087        if (!cinode->can_cache_brlcks) {
1088                up_write(&cinode->lock_sem);
1089                return rc;
1090        }
1091
1092        rc = posix_lock_file(file, flock, NULL);
1093        up_write(&cinode->lock_sem);
1094        if (rc == FILE_LOCK_DEFERRED) {
1095                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1096                if (!rc)
1097                        goto try_again;
1098                posix_unblock_lock(flock);
1099        }
1100        return rc;
1101}
1102
1103int
1104cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1105{
1106        unsigned int xid;
1107        int rc = 0, stored_rc;
1108        struct cifsLockInfo *li, *tmp;
1109        struct cifs_tcon *tcon;
1110        unsigned int num, max_num, max_buf;
1111        LOCKING_ANDX_RANGE *buf, *cur;
1112        static const int types[] = {
1113                LOCKING_ANDX_LARGE_FILES,
1114                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1115        };
1116        int i;
1117
1118        xid = get_xid();
1119        tcon = tlink_tcon(cfile->tlink);
1120
1121        /*
1122         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1123         * and check it for zero before using.
1124         */
1125        max_buf = tcon->ses->server->maxBuf;
1126        if (!max_buf) {
1127                free_xid(xid);
1128                return -EINVAL;
1129        }
1130
1131        max_num = (max_buf - sizeof(struct smb_hdr)) /
1132                                                sizeof(LOCKING_ANDX_RANGE);
1133        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1134        if (!buf) {
1135                free_xid(xid);
1136                return -ENOMEM;
1137        }
1138
1139        for (i = 0; i < 2; i++) {
1140                cur = buf;
1141                num = 0;
1142                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1143                        if (li->type != types[i])
1144                                continue;
1145                        cur->Pid = cpu_to_le16(li->pid);
1146                        cur->LengthLow = cpu_to_le32((u32)li->length);
1147                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1148                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1149                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1150                        if (++num == max_num) {
1151                                stored_rc = cifs_lockv(xid, tcon,
1152                                                       cfile->fid.netfid,
1153                                                       (__u8)li->type, 0, num,
1154                                                       buf);
1155                                if (stored_rc)
1156                                        rc = stored_rc;
1157                                cur = buf;
1158                                num = 0;
1159                        } else
1160                                cur++;
1161                }
1162
1163                if (num) {
1164                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1165                                               (__u8)types[i], 0, num, buf);
1166                        if (stored_rc)
1167                                rc = stored_rc;
1168                }
1169        }
1170
1171        kfree(buf);
1172        free_xid(xid);
1173        return rc;
1174}
1175
1176static __u32
1177hash_lockowner(fl_owner_t owner)
1178{
1179        return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1180}
1181
1182struct lock_to_push {
1183        struct list_head llist;
1184        __u64 offset;
1185        __u64 length;
1186        __u32 pid;
1187        __u16 netfid;
1188        __u8 type;
1189};
1190
1191static int
1192cifs_push_posix_locks(struct cifsFileInfo *cfile)
1193{
1194        struct inode *inode = d_inode(cfile->dentry);
1195        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1196        struct file_lock *flock;
1197        struct file_lock_context *flctx = inode->i_flctx;
1198        unsigned int count = 0, i;
1199        int rc = 0, xid, type;
1200        struct list_head locks_to_send, *el;
1201        struct lock_to_push *lck, *tmp;
1202        __u64 length;
1203
1204        xid = get_xid();
1205
1206        if (!flctx)
1207                goto out;
1208
1209        spin_lock(&flctx->flc_lock);
1210        list_for_each(el, &flctx->flc_posix) {
1211                count++;
1212        }
1213        spin_unlock(&flctx->flc_lock);
1214
1215        INIT_LIST_HEAD(&locks_to_send);
1216
1217        /*
1218         * Allocating count locks is enough because no FL_POSIX locks can be
1219         * added to the list while we are holding cinode->lock_sem that
1220         * protects locking operations of this inode.
1221         */
1222        for (i = 0; i < count; i++) {
1223                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1224                if (!lck) {
1225                        rc = -ENOMEM;
1226                        goto err_out;
1227                }
1228                list_add_tail(&lck->llist, &locks_to_send);
1229        }
1230
1231        el = locks_to_send.next;
1232        spin_lock(&flctx->flc_lock);
1233        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1234                if (el == &locks_to_send) {
1235                        /*
1236                         * The list ended. We don't have enough allocated
1237                         * structures - something is really wrong.
1238                         */
1239                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1240                        break;
1241                }
1242                length = 1 + flock->fl_end - flock->fl_start;
1243                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1244                        type = CIFS_RDLCK;
1245                else
1246                        type = CIFS_WRLCK;
1247                lck = list_entry(el, struct lock_to_push, llist);
1248                lck->pid = hash_lockowner(flock->fl_owner);
1249                lck->netfid = cfile->fid.netfid;
1250                lck->length = length;
1251                lck->type = type;
1252                lck->offset = flock->fl_start;
1253        }
1254        spin_unlock(&flctx->flc_lock);
1255
1256        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1257                int stored_rc;
1258
1259                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1260                                             lck->offset, lck->length, NULL,
1261                                             lck->type, 0);
1262                if (stored_rc)
1263                        rc = stored_rc;
1264                list_del(&lck->llist);
1265                kfree(lck);
1266        }
1267
1268out:
1269        free_xid(xid);
1270        return rc;
1271err_out:
1272        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1273                list_del(&lck->llist);
1274                kfree(lck);
1275        }
1276        goto out;
1277}
1278
1279static int
1280cifs_push_locks(struct cifsFileInfo *cfile)
1281{
1282        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1283        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1284        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1285        int rc = 0;
1286
1287        /* we are going to update can_cache_brlcks here - need a write access */
1288        down_write(&cinode->lock_sem);
1289        if (!cinode->can_cache_brlcks) {
1290                up_write(&cinode->lock_sem);
1291                return rc;
1292        }
1293
1294        if (cap_unix(tcon->ses) &&
1295            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1296            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1297                rc = cifs_push_posix_locks(cfile);
1298        else
1299                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1300
1301        cinode->can_cache_brlcks = false;
1302        up_write(&cinode->lock_sem);
1303        return rc;
1304}
1305
1306static void
1307cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1308                bool *wait_flag, struct TCP_Server_Info *server)
1309{
1310        if (flock->fl_flags & FL_POSIX)
1311                cifs_dbg(FYI, "Posix\n");
1312        if (flock->fl_flags & FL_FLOCK)
1313                cifs_dbg(FYI, "Flock\n");
1314        if (flock->fl_flags & FL_SLEEP) {
1315                cifs_dbg(FYI, "Blocking lock\n");
1316                *wait_flag = true;
1317        }
1318        if (flock->fl_flags & FL_ACCESS)
1319                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1320        if (flock->fl_flags & FL_LEASE)
1321                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1322        if (flock->fl_flags &
1323            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1324               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1325                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1326
1327        *type = server->vals->large_lock_type;
1328        if (flock->fl_type == F_WRLCK) {
1329                cifs_dbg(FYI, "F_WRLCK\n");
1330                *type |= server->vals->exclusive_lock_type;
1331                *lock = 1;
1332        } else if (flock->fl_type == F_UNLCK) {
1333                cifs_dbg(FYI, "F_UNLCK\n");
1334                *type |= server->vals->unlock_lock_type;
1335                *unlock = 1;
1336                /* Check if unlock includes more than one lock range */
1337        } else if (flock->fl_type == F_RDLCK) {
1338                cifs_dbg(FYI, "F_RDLCK\n");
1339                *type |= server->vals->shared_lock_type;
1340                *lock = 1;
1341        } else if (flock->fl_type == F_EXLCK) {
1342                cifs_dbg(FYI, "F_EXLCK\n");
1343                *type |= server->vals->exclusive_lock_type;
1344                *lock = 1;
1345        } else if (flock->fl_type == F_SHLCK) {
1346                cifs_dbg(FYI, "F_SHLCK\n");
1347                *type |= server->vals->shared_lock_type;
1348                *lock = 1;
1349        } else
1350                cifs_dbg(FYI, "Unknown type of lock\n");
1351}
1352
1353static int
1354cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1355           bool wait_flag, bool posix_lck, unsigned int xid)
1356{
1357        int rc = 0;
1358        __u64 length = 1 + flock->fl_end - flock->fl_start;
1359        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1360        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1361        struct TCP_Server_Info *server = tcon->ses->server;
1362        __u16 netfid = cfile->fid.netfid;
1363
1364        if (posix_lck) {
1365                int posix_lock_type;
1366
1367                rc = cifs_posix_lock_test(file, flock);
1368                if (!rc)
1369                        return rc;
1370
1371                if (type & server->vals->shared_lock_type)
1372                        posix_lock_type = CIFS_RDLCK;
1373                else
1374                        posix_lock_type = CIFS_WRLCK;
1375                rc = CIFSSMBPosixLock(xid, tcon, netfid,
1376                                      hash_lockowner(flock->fl_owner),
1377                                      flock->fl_start, length, flock,
1378                                      posix_lock_type, wait_flag);
1379                return rc;
1380        }
1381
1382        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1383        if (!rc)
1384                return rc;
1385
1386        /* BB we could chain these into one lock request BB */
1387        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1388                                    1, 0, false);
1389        if (rc == 0) {
1390                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1391                                            type, 0, 1, false);
1392                flock->fl_type = F_UNLCK;
1393                if (rc != 0)
1394                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1395                                 rc);
1396                return 0;
1397        }
1398
1399        if (type & server->vals->shared_lock_type) {
1400                flock->fl_type = F_WRLCK;
1401                return 0;
1402        }
1403
1404        type &= ~server->vals->exclusive_lock_type;
1405
1406        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1407                                    type | server->vals->shared_lock_type,
1408                                    1, 0, false);
1409        if (rc == 0) {
1410                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1411                        type | server->vals->shared_lock_type, 0, 1, false);
1412                flock->fl_type = F_RDLCK;
1413                if (rc != 0)
1414                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1415                                 rc);
1416        } else
1417                flock->fl_type = F_WRLCK;
1418
1419        return 0;
1420}
1421
1422void
1423cifs_move_llist(struct list_head *source, struct list_head *dest)
1424{
1425        struct list_head *li, *tmp;
1426        list_for_each_safe(li, tmp, source)
1427                list_move(li, dest);
1428}
1429
1430void
1431cifs_free_llist(struct list_head *llist)
1432{
1433        struct cifsLockInfo *li, *tmp;
1434        list_for_each_entry_safe(li, tmp, llist, llist) {
1435                cifs_del_lock_waiters(li);
1436                list_del(&li->llist);
1437                kfree(li);
1438        }
1439}
1440
1441int
1442cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1443                  unsigned int xid)
1444{
1445        int rc = 0, stored_rc;
1446        static const int types[] = {
1447                LOCKING_ANDX_LARGE_FILES,
1448                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1449        };
1450        unsigned int i;
1451        unsigned int max_num, num, max_buf;
1452        LOCKING_ANDX_RANGE *buf, *cur;
1453        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1454        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1455        struct cifsLockInfo *li, *tmp;
1456        __u64 length = 1 + flock->fl_end - flock->fl_start;
1457        struct list_head tmp_llist;
1458
1459        INIT_LIST_HEAD(&tmp_llist);
1460
1461        /*
1462         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1463         * and check it for zero before using.
1464         */
1465        max_buf = tcon->ses->server->maxBuf;
1466        if (!max_buf)
1467                return -EINVAL;
1468
1469        max_num = (max_buf - sizeof(struct smb_hdr)) /
1470                                                sizeof(LOCKING_ANDX_RANGE);
1471        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1472        if (!buf)
1473                return -ENOMEM;
1474
1475        down_write(&cinode->lock_sem);
1476        for (i = 0; i < 2; i++) {
1477                cur = buf;
1478                num = 0;
1479                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1480                        if (flock->fl_start > li->offset ||
1481                            (flock->fl_start + length) <
1482                            (li->offset + li->length))
1483                                continue;
1484                        if (current->tgid != li->pid)
1485                                continue;
1486                        if (types[i] != li->type)
1487                                continue;
1488                        if (cinode->can_cache_brlcks) {
1489                                /*
1490                                 * We can cache brlock requests - simply remove
1491                                 * a lock from the file's list.
1492                                 */
1493                                list_del(&li->llist);
1494                                cifs_del_lock_waiters(li);
1495                                kfree(li);
1496                                continue;
1497                        }
1498                        cur->Pid = cpu_to_le16(li->pid);
1499                        cur->LengthLow = cpu_to_le32((u32)li->length);
1500                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1501                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1502                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1503                        /*
1504                         * We need to save a lock here to let us add it again to
1505                         * the file's list if the unlock range request fails on
1506                         * the server.
1507                         */
1508                        list_move(&li->llist, &tmp_llist);
1509                        if (++num == max_num) {
1510                                stored_rc = cifs_lockv(xid, tcon,
1511                                                       cfile->fid.netfid,
1512                                                       li->type, num, 0, buf);
1513                                if (stored_rc) {
1514                                        /*
1515                                         * We failed on the unlock range
1516                                         * request - add all locks from the tmp
1517                                         * list to the head of the file's list.
1518                                         */
1519                                        cifs_move_llist(&tmp_llist,
1520                                                        &cfile->llist->locks);
1521                                        rc = stored_rc;
1522                                } else
1523                                        /*
1524                                         * The unlock range request succeed -
1525                                         * free the tmp list.
1526                                         */
1527                                        cifs_free_llist(&tmp_llist);
1528                                cur = buf;
1529                                num = 0;
1530                        } else
1531                                cur++;
1532                }
1533                if (num) {
1534                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1535                                               types[i], num, 0, buf);
1536                        if (stored_rc) {
1537                                cifs_move_llist(&tmp_llist,
1538                                                &cfile->llist->locks);
1539                                rc = stored_rc;
1540                        } else
1541                                cifs_free_llist(&tmp_llist);
1542                }
1543        }
1544
1545        up_write(&cinode->lock_sem);
1546        kfree(buf);
1547        return rc;
1548}
1549
1550static int
1551cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1552           bool wait_flag, bool posix_lck, int lock, int unlock,
1553           unsigned int xid)
1554{
1555        int rc = 0;
1556        __u64 length = 1 + flock->fl_end - flock->fl_start;
1557        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1558        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1559        struct TCP_Server_Info *server = tcon->ses->server;
1560        struct inode *inode = d_inode(cfile->dentry);
1561
1562        if (posix_lck) {
1563                int posix_lock_type;
1564
1565                rc = cifs_posix_lock_set(file, flock);
1566                if (!rc || rc < 0)
1567                        return rc;
1568
1569                if (type & server->vals->shared_lock_type)
1570                        posix_lock_type = CIFS_RDLCK;
1571                else
1572                        posix_lock_type = CIFS_WRLCK;
1573
1574                if (unlock == 1)
1575                        posix_lock_type = CIFS_UNLCK;
1576
1577                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1578                                      hash_lockowner(flock->fl_owner),
1579                                      flock->fl_start, length,
1580                                      NULL, posix_lock_type, wait_flag);
1581                goto out;
1582        }
1583
1584        if (lock) {
1585                struct cifsLockInfo *lock;
1586
1587                lock = cifs_lock_init(flock->fl_start, length, type);
1588                if (!lock)
1589                        return -ENOMEM;
1590
1591                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1592                if (rc < 0) {
1593                        kfree(lock);
1594                        return rc;
1595                }
1596                if (!rc)
1597                        goto out;
1598
1599                /*
1600                 * Windows 7 server can delay breaking lease from read to None
1601                 * if we set a byte-range lock on a file - break it explicitly
1602                 * before sending the lock to the server to be sure the next
1603                 * read won't conflict with non-overlapted locks due to
1604                 * pagereading.
1605                 */
1606                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1607                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1608                        cifs_zap_mapping(inode);
1609                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1610                                 inode);
1611                        CIFS_I(inode)->oplock = 0;
1612                }
1613
1614                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1615                                            type, 1, 0, wait_flag);
1616                if (rc) {
1617                        kfree(lock);
1618                        return rc;
1619                }
1620
1621                cifs_lock_add(cfile, lock);
1622        } else if (unlock)
1623                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1624
1625out:
1626        if (flock->fl_flags & FL_POSIX && !rc)
1627                rc = locks_lock_file_wait(file, flock);
1628        return rc;
1629}
1630
1631int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1632{
1633        int rc, xid;
1634        int lock = 0, unlock = 0;
1635        bool wait_flag = false;
1636        bool posix_lck = false;
1637        struct cifs_sb_info *cifs_sb;
1638        struct cifs_tcon *tcon;
1639        struct cifsInodeInfo *cinode;
1640        struct cifsFileInfo *cfile;
1641        __u16 netfid;
1642        __u32 type;
1643
1644        rc = -EACCES;
1645        xid = get_xid();
1646
1647        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1648                 cmd, flock->fl_flags, flock->fl_type,
1649                 flock->fl_start, flock->fl_end);
1650
1651        cfile = (struct cifsFileInfo *)file->private_data;
1652        tcon = tlink_tcon(cfile->tlink);
1653
1654        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1655                        tcon->ses->server);
1656
1657        cifs_sb = CIFS_FILE_SB(file);
1658        netfid = cfile->fid.netfid;
1659        cinode = CIFS_I(file_inode(file));
1660
1661        if (cap_unix(tcon->ses) &&
1662            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1663            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1664                posix_lck = true;
1665        /*
1666         * BB add code here to normalize offset and length to account for
1667         * negative length which we can not accept over the wire.
1668         */
1669        if (IS_GETLK(cmd)) {
1670                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1671                free_xid(xid);
1672                return rc;
1673        }
1674
1675        if (!lock && !unlock) {
1676                /*
1677                 * if no lock or unlock then nothing to do since we do not
1678                 * know what it is
1679                 */
1680                free_xid(xid);
1681                return -EOPNOTSUPP;
1682        }
1683
1684        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1685                        xid);
1686        free_xid(xid);
1687        return rc;
1688}
1689
1690/*
1691 * update the file size (if needed) after a write. Should be called with
1692 * the inode->i_lock held
1693 */
1694void
1695cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1696                      unsigned int bytes_written)
1697{
1698        loff_t end_of_write = offset + bytes_written;
1699
1700        if (end_of_write > cifsi->server_eof)
1701                cifsi->server_eof = end_of_write;
1702}
1703
1704static ssize_t
1705cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1706           size_t write_size, loff_t *offset)
1707{
1708        int rc = 0;
1709        unsigned int bytes_written = 0;
1710        unsigned int total_written;
1711        struct cifs_sb_info *cifs_sb;
1712        struct cifs_tcon *tcon;
1713        struct TCP_Server_Info *server;
1714        unsigned int xid;
1715        struct dentry *dentry = open_file->dentry;
1716        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1717        struct cifs_io_parms io_parms;
1718
1719        cifs_sb = CIFS_SB(dentry->d_sb);
1720
1721        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1722                 write_size, *offset, dentry);
1723
1724        tcon = tlink_tcon(open_file->tlink);
1725        server = tcon->ses->server;
1726
1727        if (!server->ops->sync_write)
1728                return -ENOSYS;
1729
1730        xid = get_xid();
1731
1732        for (total_written = 0; write_size > total_written;
1733             total_written += bytes_written) {
1734                rc = -EAGAIN;
1735                while (rc == -EAGAIN) {
1736                        struct kvec iov[2];
1737                        unsigned int len;
1738
1739                        if (open_file->invalidHandle) {
1740                                /* we could deadlock if we called
1741                                   filemap_fdatawait from here so tell
1742                                   reopen_file not to flush data to
1743                                   server now */
1744                                rc = cifs_reopen_file(open_file, false);
1745                                if (rc != 0)
1746                                        break;
1747                        }
1748
1749                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1750                                  (unsigned int)write_size - total_written);
1751                        /* iov[0] is reserved for smb header */
1752                        iov[1].iov_base = (char *)write_data + total_written;
1753                        iov[1].iov_len = len;
1754                        io_parms.pid = pid;
1755                        io_parms.tcon = tcon;
1756                        io_parms.offset = *offset;
1757                        io_parms.length = len;
1758                        rc = server->ops->sync_write(xid, &open_file->fid,
1759                                        &io_parms, &bytes_written, iov, 1);
1760                }
1761                if (rc || (bytes_written == 0)) {
1762                        if (total_written)
1763                                break;
1764                        else {
1765                                free_xid(xid);
1766                                return rc;
1767                        }
1768                } else {
1769                        spin_lock(&d_inode(dentry)->i_lock);
1770                        cifs_update_eof(cifsi, *offset, bytes_written);
1771                        spin_unlock(&d_inode(dentry)->i_lock);
1772                        *offset += bytes_written;
1773                }
1774        }
1775
1776        cifs_stats_bytes_written(tcon, total_written);
1777
1778        if (total_written > 0) {
1779                spin_lock(&d_inode(dentry)->i_lock);
1780                if (*offset > d_inode(dentry)->i_size)
1781                        i_size_write(d_inode(dentry), *offset);
1782                spin_unlock(&d_inode(dentry)->i_lock);
1783        }
1784        mark_inode_dirty_sync(d_inode(dentry));
1785        free_xid(xid);
1786        return total_written;
1787}
1788
1789struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1790                                        bool fsuid_only)
1791{
1792        struct cifsFileInfo *open_file = NULL;
1793        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1794        struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1795
1796        /* only filter by fsuid on multiuser mounts */
1797        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1798                fsuid_only = false;
1799
1800        spin_lock(&tcon->open_file_lock);
1801        /* we could simply get the first_list_entry since write-only entries
1802           are always at the end of the list but since the first entry might
1803           have a close pending, we go through the whole list */
1804        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1805                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1806                        continue;
1807                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1808                        if (!open_file->invalidHandle) {
1809                                /* found a good file */
1810                                /* lock it so it will not be closed on us */
1811                                cifsFileInfo_get(open_file);
1812                                spin_unlock(&tcon->open_file_lock);
1813                                return open_file;
1814                        } /* else might as well continue, and look for
1815                             another, or simply have the caller reopen it
1816                             again rather than trying to fix this handle */
1817                } else /* write only file */
1818                        break; /* write only files are last so must be done */
1819        }
1820        spin_unlock(&tcon->open_file_lock);
1821        return NULL;
1822}
1823
1824struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1825                                        bool fsuid_only)
1826{
1827        struct cifsFileInfo *open_file, *inv_file = NULL;
1828        struct cifs_sb_info *cifs_sb;
1829        struct cifs_tcon *tcon;
1830        bool any_available = false;
1831        int rc;
1832        unsigned int refind = 0;
1833
1834        /* Having a null inode here (because mapping->host was set to zero by
1835        the VFS or MM) should not happen but we had reports of on oops (due to
1836        it being zero) during stress testcases so we need to check for it */
1837
1838        if (cifs_inode == NULL) {
1839                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1840                dump_stack();
1841                return NULL;
1842        }
1843
1844        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1845        tcon = cifs_sb_master_tcon(cifs_sb);
1846
1847        /* only filter by fsuid on multiuser mounts */
1848        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1849                fsuid_only = false;
1850
1851        spin_lock(&tcon->open_file_lock);
1852refind_writable:
1853        if (refind > MAX_REOPEN_ATT) {
1854                spin_unlock(&tcon->open_file_lock);
1855                return NULL;
1856        }
1857        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1858                if (!any_available && open_file->pid != current->tgid)
1859                        continue;
1860                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1861                        continue;
1862                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1863                        if (!open_file->invalidHandle) {
1864                                /* found a good writable file */
1865                                cifsFileInfo_get(open_file);
1866                                spin_unlock(&tcon->open_file_lock);
1867                                return open_file;
1868                        } else {
1869                                if (!inv_file)
1870                                        inv_file = open_file;
1871                        }
1872                }
1873        }
1874        /* couldn't find useable FH with same pid, try any available */
1875        if (!any_available) {
1876                any_available = true;
1877                goto refind_writable;
1878        }
1879
1880        if (inv_file) {
1881                any_available = false;
1882                cifsFileInfo_get(inv_file);
1883        }
1884
1885        spin_unlock(&tcon->open_file_lock);
1886
1887        if (inv_file) {
1888                rc = cifs_reopen_file(inv_file, false);
1889                if (!rc)
1890                        return inv_file;
1891                else {
1892                        spin_lock(&tcon->open_file_lock);
1893                        list_move_tail(&inv_file->flist,
1894                                        &cifs_inode->openFileList);
1895                        spin_unlock(&tcon->open_file_lock);
1896                        cifsFileInfo_put(inv_file);
1897                        ++refind;
1898                        inv_file = NULL;
1899                        spin_lock(&tcon->open_file_lock);
1900                        goto refind_writable;
1901                }
1902        }
1903
1904        return NULL;
1905}
1906
1907static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1908{
1909        struct address_space *mapping = page->mapping;
1910        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1911        char *write_data;
1912        int rc = -EFAULT;
1913        int bytes_written = 0;
1914        struct inode *inode;
1915        struct cifsFileInfo *open_file;
1916
1917        if (!mapping || !mapping->host)
1918                return -EFAULT;
1919
1920        inode = page->mapping->host;
1921
1922        offset += (loff_t)from;
1923        write_data = kmap(page);
1924        write_data += from;
1925
1926        if ((to > PAGE_SIZE) || (from > to)) {
1927                kunmap(page);
1928                return -EIO;
1929        }
1930
1931        /* racing with truncate? */
1932        if (offset > mapping->host->i_size) {
1933                kunmap(page);
1934                return 0; /* don't care */
1935        }
1936
1937        /* check to make sure that we are not extending the file */
1938        if (mapping->host->i_size - offset < (loff_t)to)
1939                to = (unsigned)(mapping->host->i_size - offset);
1940
1941        open_file = find_writable_file(CIFS_I(mapping->host), false);
1942        if (open_file) {
1943                bytes_written = cifs_write(open_file, open_file->pid,
1944                                           write_data, to - from, &offset);
1945                cifsFileInfo_put(open_file);
1946                /* Does mm or vfs already set times? */
1947                inode->i_atime = inode->i_mtime = current_time(inode);
1948                if ((bytes_written > 0) && (offset))
1949                        rc = 0;
1950                else if (bytes_written < 0)
1951                        rc = bytes_written;
1952        } else {
1953                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1954                rc = -EIO;
1955        }
1956
1957        kunmap(page);
1958        return rc;
1959}
1960
1961static struct cifs_writedata *
1962wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1963                          pgoff_t end, pgoff_t *index,
1964                          unsigned int *found_pages)
1965{
1966        struct cifs_writedata *wdata;
1967
1968        wdata = cifs_writedata_alloc((unsigned int)tofind,
1969                                     cifs_writev_complete);
1970        if (!wdata)
1971                return NULL;
1972
1973        *found_pages = find_get_pages_range_tag(mapping, index, end,
1974                                PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1975        return wdata;
1976}
1977
1978static unsigned int
1979wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1980                    struct address_space *mapping,
1981                    struct writeback_control *wbc,
1982                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1983{
1984        unsigned int nr_pages = 0, i;
1985        struct page *page;
1986
1987        for (i = 0; i < found_pages; i++) {
1988                page = wdata->pages[i];
1989                /*
1990                 * At this point we hold neither the i_pages lock nor the
1991                 * page lock: the page may be truncated or invalidated
1992                 * (changing page->mapping to NULL), or even swizzled
1993                 * back from swapper_space to tmpfs file mapping
1994                 */
1995
1996                if (nr_pages == 0)
1997                        lock_page(page);
1998                else if (!trylock_page(page))
1999                        break;
2000
2001                if (unlikely(page->mapping != mapping)) {
2002                        unlock_page(page);
2003                        break;
2004                }
2005
2006                if (!wbc->range_cyclic && page->index > end) {
2007                        *done = true;
2008                        unlock_page(page);
2009                        break;
2010                }
2011
2012                if (*next && (page->index != *next)) {
2013                        /* Not next consecutive page */
2014                        unlock_page(page);
2015                        break;
2016                }
2017
2018                if (wbc->sync_mode != WB_SYNC_NONE)
2019                        wait_on_page_writeback(page);
2020
2021                if (PageWriteback(page) ||
2022                                !clear_page_dirty_for_io(page)) {
2023                        unlock_page(page);
2024                        break;
2025                }
2026
2027                /*
2028                 * This actually clears the dirty bit in the radix tree.
2029                 * See cifs_writepage() for more commentary.
2030                 */
2031                set_page_writeback(page);
2032                if (page_offset(page) >= i_size_read(mapping->host)) {
2033                        *done = true;
2034                        unlock_page(page);
2035                        end_page_writeback(page);
2036                        break;
2037                }
2038
2039                wdata->pages[i] = page;
2040                *next = page->index + 1;
2041                ++nr_pages;
2042        }
2043
2044        /* reset index to refind any pages skipped */
2045        if (nr_pages == 0)
2046                *index = wdata->pages[0]->index + 1;
2047
2048        /* put any pages we aren't going to use */
2049        for (i = nr_pages; i < found_pages; i++) {
2050                put_page(wdata->pages[i]);
2051                wdata->pages[i] = NULL;
2052        }
2053
2054        return nr_pages;
2055}
2056
2057static int
2058wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2059                 struct address_space *mapping, struct writeback_control *wbc)
2060{
2061        int rc = 0;
2062        struct TCP_Server_Info *server;
2063        unsigned int i;
2064
2065        wdata->sync_mode = wbc->sync_mode;
2066        wdata->nr_pages = nr_pages;
2067        wdata->offset = page_offset(wdata->pages[0]);
2068        wdata->pagesz = PAGE_SIZE;
2069        wdata->tailsz = min(i_size_read(mapping->host) -
2070                        page_offset(wdata->pages[nr_pages - 1]),
2071                        (loff_t)PAGE_SIZE);
2072        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2073
2074        if (wdata->cfile != NULL)
2075                cifsFileInfo_put(wdata->cfile);
2076        wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2077        if (!wdata->cfile) {
2078                cifs_dbg(VFS, "No writable handles for inode\n");
2079                rc = -EBADF;
2080        } else {
2081                wdata->pid = wdata->cfile->pid;
2082                server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2083                rc = server->ops->async_writev(wdata, cifs_writedata_release);
2084        }
2085
2086        for (i = 0; i < nr_pages; ++i)
2087                unlock_page(wdata->pages[i]);
2088
2089        return rc;
2090}
2091
2092static int cifs_writepages(struct address_space *mapping,
2093                           struct writeback_control *wbc)
2094{
2095        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2096        struct TCP_Server_Info *server;
2097        bool done = false, scanned = false, range_whole = false;
2098        pgoff_t end, index;
2099        struct cifs_writedata *wdata;
2100        int rc = 0;
2101
2102        /*
2103         * If wsize is smaller than the page cache size, default to writing
2104         * one page at a time via cifs_writepage
2105         */
2106        if (cifs_sb->wsize < PAGE_SIZE)
2107                return generic_writepages(mapping, wbc);
2108
2109        if (wbc->range_cyclic) {
2110                index = mapping->writeback_index; /* Start from prev offset */
2111                end = -1;
2112        } else {
2113                index = wbc->range_start >> PAGE_SHIFT;
2114                end = wbc->range_end >> PAGE_SHIFT;
2115                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2116                        range_whole = true;
2117                scanned = true;
2118        }
2119        server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2120retry:
2121        while (!done && index <= end) {
2122                unsigned int i, nr_pages, found_pages, wsize, credits;
2123                pgoff_t next = 0, tofind, saved_index = index;
2124
2125                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2126                                                   &wsize, &credits);
2127                if (rc)
2128                        break;
2129
2130                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2131
2132                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2133                                                  &found_pages);
2134                if (!wdata) {
2135                        rc = -ENOMEM;
2136                        add_credits_and_wake_if(server, credits, 0);
2137                        break;
2138                }
2139
2140                if (found_pages == 0) {
2141                        kref_put(&wdata->refcount, cifs_writedata_release);
2142                        add_credits_and_wake_if(server, credits, 0);
2143                        break;
2144                }
2145
2146                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2147                                               end, &index, &next, &done);
2148
2149                /* nothing to write? */
2150                if (nr_pages == 0) {
2151                        kref_put(&wdata->refcount, cifs_writedata_release);
2152                        add_credits_and_wake_if(server, credits, 0);
2153                        continue;
2154                }
2155
2156                wdata->credits = credits;
2157
2158                rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2159
2160                /* send failure -- clean up the mess */
2161                if (rc != 0) {
2162                        add_credits_and_wake_if(server, wdata->credits, 0);
2163                        for (i = 0; i < nr_pages; ++i) {
2164                                if (rc == -EAGAIN)
2165                                        redirty_page_for_writepage(wbc,
2166                                                           wdata->pages[i]);
2167                                else
2168                                        SetPageError(wdata->pages[i]);
2169                                end_page_writeback(wdata->pages[i]);
2170                                put_page(wdata->pages[i]);
2171                        }
2172                        if (rc != -EAGAIN)
2173                                mapping_set_error(mapping, rc);
2174                }
2175                kref_put(&wdata->refcount, cifs_writedata_release);
2176
2177                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2178                        index = saved_index;
2179                        continue;
2180                }
2181
2182                wbc->nr_to_write -= nr_pages;
2183                if (wbc->nr_to_write <= 0)
2184                        done = true;
2185
2186                index = next;
2187        }
2188
2189        if (!scanned && !done) {
2190                /*
2191                 * We hit the last page and there is more work to be done: wrap
2192                 * back to the start of the file
2193                 */
2194                scanned = true;
2195                index = 0;
2196                goto retry;
2197        }
2198
2199        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2200                mapping->writeback_index = index;
2201
2202        return rc;
2203}
2204
2205static int
2206cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2207{
2208        int rc;
2209        unsigned int xid;
2210
2211        xid = get_xid();
2212/* BB add check for wbc flags */
2213        get_page(page);
2214        if (!PageUptodate(page))
2215                cifs_dbg(FYI, "ppw - page not up to date\n");
2216
2217        /*
2218         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2219         *
2220         * A writepage() implementation always needs to do either this,
2221         * or re-dirty the page with "redirty_page_for_writepage()" in
2222         * the case of a failure.
2223         *
2224         * Just unlocking the page will cause the radix tree tag-bits
2225         * to fail to update with the state of the page correctly.
2226         */
2227        set_page_writeback(page);
2228retry_write:
2229        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2230        if (rc == -EAGAIN) {
2231                if (wbc->sync_mode == WB_SYNC_ALL)
2232                        goto retry_write;
2233                redirty_page_for_writepage(wbc, page);
2234        } else if (rc != 0) {
2235                SetPageError(page);
2236                mapping_set_error(page->mapping, rc);
2237        } else {
2238                SetPageUptodate(page);
2239        }
2240        end_page_writeback(page);
2241        put_page(page);
2242        free_xid(xid);
2243        return rc;
2244}
2245
2246static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2247{
2248        int rc = cifs_writepage_locked(page, wbc);
2249        unlock_page(page);
2250        return rc;
2251}
2252
2253static int cifs_write_end(struct file *file, struct address_space *mapping,
2254                        loff_t pos, unsigned len, unsigned copied,
2255                        struct page *page, void *fsdata)
2256{
2257        int rc;
2258        struct inode *inode = mapping->host;
2259        struct cifsFileInfo *cfile = file->private_data;
2260        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2261        __u32 pid;
2262
2263        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2264                pid = cfile->pid;
2265        else
2266                pid = current->tgid;
2267
2268        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2269                 page, pos, copied);
2270
2271        if (PageChecked(page)) {
2272                if (copied == len)
2273                        SetPageUptodate(page);
2274                ClearPageChecked(page);
2275        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2276                SetPageUptodate(page);
2277
2278        if (!PageUptodate(page)) {
2279                char *page_data;
2280                unsigned offset = pos & (PAGE_SIZE - 1);
2281                unsigned int xid;
2282
2283                xid = get_xid();
2284                /* this is probably better than directly calling
2285                   partialpage_write since in this function the file handle is
2286                   known which we might as well leverage */
2287                /* BB check if anything else missing out of ppw
2288                   such as updating last write time */
2289                page_data = kmap(page);
2290                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2291                /* if (rc < 0) should we set writebehind rc? */
2292                kunmap(page);
2293
2294                free_xid(xid);
2295        } else {
2296                rc = copied;
2297                pos += copied;
2298                set_page_dirty(page);
2299        }
2300
2301        if (rc > 0) {
2302                spin_lock(&inode->i_lock);
2303                if (pos > inode->i_size)
2304                        i_size_write(inode, pos);
2305                spin_unlock(&inode->i_lock);
2306        }
2307
2308        unlock_page(page);
2309        put_page(page);
2310
2311        return rc;
2312}
2313
2314int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2315                      int datasync)
2316{
2317        unsigned int xid;
2318        int rc = 0;
2319        struct cifs_tcon *tcon;
2320        struct TCP_Server_Info *server;
2321        struct cifsFileInfo *smbfile = file->private_data;
2322        struct inode *inode = file_inode(file);
2323        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2324
2325        rc = file_write_and_wait_range(file, start, end);
2326        if (rc)
2327                return rc;
2328        inode_lock(inode);
2329
2330        xid = get_xid();
2331
2332        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2333                 file, datasync);
2334
2335        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2336                rc = cifs_zap_mapping(inode);
2337                if (rc) {
2338                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2339                        rc = 0; /* don't care about it in fsync */
2340                }
2341        }
2342
2343        tcon = tlink_tcon(smbfile->tlink);
2344        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2345                server = tcon->ses->server;
2346                if (server->ops->flush)
2347                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2348                else
2349                        rc = -ENOSYS;
2350        }
2351
2352        free_xid(xid);
2353        inode_unlock(inode);
2354        return rc;
2355}
2356
2357int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2358{
2359        unsigned int xid;
2360        int rc = 0;
2361        struct cifs_tcon *tcon;
2362        struct TCP_Server_Info *server;
2363        struct cifsFileInfo *smbfile = file->private_data;
2364        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2365        struct inode *inode = file->f_mapping->host;
2366
2367        rc = file_write_and_wait_range(file, start, end);
2368        if (rc)
2369                return rc;
2370        inode_lock(inode);
2371
2372        xid = get_xid();
2373
2374        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2375                 file, datasync);
2376
2377        tcon = tlink_tcon(smbfile->tlink);
2378        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2379                server = tcon->ses->server;
2380                if (server->ops->flush)
2381                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2382                else
2383                        rc = -ENOSYS;
2384        }
2385
2386        free_xid(xid);
2387        inode_unlock(inode);
2388        return rc;
2389}
2390
2391/*
2392 * As file closes, flush all cached write data for this inode checking
2393 * for write behind errors.
2394 */
2395int cifs_flush(struct file *file, fl_owner_t id)
2396{
2397        struct inode *inode = file_inode(file);
2398        int rc = 0;
2399
2400        if (file->f_mode & FMODE_WRITE)
2401                rc = filemap_write_and_wait(inode->i_mapping);
2402
2403        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2404
2405        return rc;
2406}
2407
2408static int
2409cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2410{
2411        int rc = 0;
2412        unsigned long i;
2413
2414        for (i = 0; i < num_pages; i++) {
2415                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2416                if (!pages[i]) {
2417                        /*
2418                         * save number of pages we have already allocated and
2419                         * return with ENOMEM error
2420                         */
2421                        num_pages = i;
2422                        rc = -ENOMEM;
2423                        break;
2424                }
2425        }
2426
2427        if (rc) {
2428                for (i = 0; i < num_pages; i++)
2429                        put_page(pages[i]);
2430        }
2431        return rc;
2432}
2433
2434static inline
2435size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2436{
2437        size_t num_pages;
2438        size_t clen;
2439
2440        clen = min_t(const size_t, len, wsize);
2441        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2442
2443        if (cur_len)
2444                *cur_len = clen;
2445
2446        return num_pages;
2447}
2448
2449static void
2450cifs_uncached_writedata_release(struct kref *refcount)
2451{
2452        int i;
2453        struct cifs_writedata *wdata = container_of(refcount,
2454                                        struct cifs_writedata, refcount);
2455
2456        kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2457        for (i = 0; i < wdata->nr_pages; i++)
2458                put_page(wdata->pages[i]);
2459        cifs_writedata_release(refcount);
2460}
2461
2462static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2463
2464static void
2465cifs_uncached_writev_complete(struct work_struct *work)
2466{
2467        struct cifs_writedata *wdata = container_of(work,
2468                                        struct cifs_writedata, work);
2469        struct inode *inode = d_inode(wdata->cfile->dentry);
2470        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2471
2472        spin_lock(&inode->i_lock);
2473        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2474        if (cifsi->server_eof > inode->i_size)
2475                i_size_write(inode, cifsi->server_eof);
2476        spin_unlock(&inode->i_lock);
2477
2478        complete(&wdata->done);
2479        collect_uncached_write_data(wdata->ctx);
2480        /* the below call can possibly free the last ref to aio ctx */
2481        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2482}
2483
2484static int
2485wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2486                      size_t *len, unsigned long *num_pages)
2487{
2488        size_t save_len, copied, bytes, cur_len = *len;
2489        unsigned long i, nr_pages = *num_pages;
2490
2491        save_len = cur_len;
2492        for (i = 0; i < nr_pages; i++) {
2493                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2494                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2495                cur_len -= copied;
2496                /*
2497                 * If we didn't copy as much as we expected, then that
2498                 * may mean we trod into an unmapped area. Stop copying
2499                 * at that point. On the next pass through the big
2500                 * loop, we'll likely end up getting a zero-length
2501                 * write and bailing out of it.
2502                 */
2503                if (copied < bytes)
2504                        break;
2505        }
2506        cur_len = save_len - cur_len;
2507        *len = cur_len;
2508
2509        /*
2510         * If we have no data to send, then that probably means that
2511         * the copy above failed altogether. That's most likely because
2512         * the address in the iovec was bogus. Return -EFAULT and let
2513         * the caller free anything we allocated and bail out.
2514         */
2515        if (!cur_len)
2516                return -EFAULT;
2517
2518        /*
2519         * i + 1 now represents the number of pages we actually used in
2520         * the copy phase above.
2521         */
2522        *num_pages = i + 1;
2523        return 0;
2524}
2525
2526static int
2527cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2528                     struct cifsFileInfo *open_file,
2529                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2530                     struct cifs_aio_ctx *ctx)
2531{
2532        int rc = 0;
2533        size_t cur_len;
2534        unsigned long nr_pages, num_pages, i;
2535        struct cifs_writedata *wdata;
2536        struct iov_iter saved_from = *from;
2537        loff_t saved_offset = offset;
2538        pid_t pid;
2539        struct TCP_Server_Info *server;
2540
2541        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2542                pid = open_file->pid;
2543        else
2544                pid = current->tgid;
2545
2546        server = tlink_tcon(open_file->tlink)->ses->server;
2547
2548        do {
2549                unsigned int wsize, credits;
2550
2551                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2552                                                   &wsize, &credits);
2553                if (rc)
2554                        break;
2555
2556                nr_pages = get_numpages(wsize, len, &cur_len);
2557                wdata = cifs_writedata_alloc(nr_pages,
2558                                             cifs_uncached_writev_complete);
2559                if (!wdata) {
2560                        rc = -ENOMEM;
2561                        add_credits_and_wake_if(server, credits, 0);
2562                        break;
2563                }
2564
2565                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2566                if (rc) {
2567                        kfree(wdata);
2568                        add_credits_and_wake_if(server, credits, 0);
2569                        break;
2570                }
2571
2572                num_pages = nr_pages;
2573                rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2574                if (rc) {
2575                        for (i = 0; i < nr_pages; i++)
2576                                put_page(wdata->pages[i]);
2577                        kfree(wdata);
2578                        add_credits_and_wake_if(server, credits, 0);
2579                        break;
2580                }
2581
2582                /*
2583                 * Bring nr_pages down to the number of pages we actually used,
2584                 * and free any pages that we didn't use.
2585                 */
2586                for ( ; nr_pages > num_pages; nr_pages--)
2587                        put_page(wdata->pages[nr_pages - 1]);
2588
2589                wdata->sync_mode = WB_SYNC_ALL;
2590                wdata->nr_pages = nr_pages;
2591                wdata->offset = (__u64)offset;
2592                wdata->cfile = cifsFileInfo_get(open_file);
2593                wdata->pid = pid;
2594                wdata->bytes = cur_len;
2595                wdata->pagesz = PAGE_SIZE;
2596                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2597                wdata->credits = credits;
2598                wdata->ctx = ctx;
2599                kref_get(&ctx->refcount);
2600
2601                if (!wdata->cfile->invalidHandle ||
2602                    !(rc = cifs_reopen_file(wdata->cfile, false)))
2603                        rc = server->ops->async_writev(wdata,
2604                                        cifs_uncached_writedata_release);
2605                if (rc) {
2606                        add_credits_and_wake_if(server, wdata->credits, 0);
2607                        kref_put(&wdata->refcount,
2608                                 cifs_uncached_writedata_release);
2609                        if (rc == -EAGAIN) {
2610                                *from = saved_from;
2611                                iov_iter_advance(from, offset - saved_offset);
2612                                continue;
2613                        }
2614                        break;
2615                }
2616
2617                list_add_tail(&wdata->list, wdata_list);
2618                offset += cur_len;
2619                len -= cur_len;
2620        } while (len > 0);
2621
2622        return rc;
2623}
2624
2625static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2626{
2627        struct cifs_writedata *wdata, *tmp;
2628        struct cifs_tcon *tcon;
2629        struct cifs_sb_info *cifs_sb;
2630        struct dentry *dentry = ctx->cfile->dentry;
2631        unsigned int i;
2632        int rc;
2633
2634        tcon = tlink_tcon(ctx->cfile->tlink);
2635        cifs_sb = CIFS_SB(dentry->d_sb);
2636
2637        mutex_lock(&ctx->aio_mutex);
2638
2639        if (list_empty(&ctx->list)) {
2640                mutex_unlock(&ctx->aio_mutex);
2641                return;
2642        }
2643
2644        rc = ctx->rc;
2645        /*
2646         * Wait for and collect replies for any successful sends in order of
2647         * increasing offset. Once an error is hit, then return without waiting
2648         * for any more replies.
2649         */
2650restart_loop:
2651        list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2652                if (!rc) {
2653                        if (!try_wait_for_completion(&wdata->done)) {
2654                                mutex_unlock(&ctx->aio_mutex);
2655                                return;
2656                        }
2657
2658                        if (wdata->result)
2659                                rc = wdata->result;
2660                        else
2661                                ctx->total_len += wdata->bytes;
2662
2663                        /* resend call if it's a retryable error */
2664                        if (rc == -EAGAIN) {
2665                                struct list_head tmp_list;
2666                                struct iov_iter tmp_from = ctx->iter;
2667
2668                                INIT_LIST_HEAD(&tmp_list);
2669                                list_del_init(&wdata->list);
2670
2671                                iov_iter_advance(&tmp_from,
2672                                                 wdata->offset - ctx->pos);
2673
2674                                rc = cifs_write_from_iter(wdata->offset,
2675                                                wdata->bytes, &tmp_from,
2676                                                ctx->cfile, cifs_sb, &tmp_list,
2677                                                ctx);
2678
2679                                list_splice(&tmp_list, &ctx->list);
2680
2681                                kref_put(&wdata->refcount,
2682                                         cifs_uncached_writedata_release);
2683                                goto restart_loop;
2684                        }
2685                }
2686                list_del_init(&wdata->list);
2687                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2688        }
2689
2690        for (i = 0; i < ctx->npages; i++)
2691                put_page(ctx->bv[i].bv_page);
2692
2693        cifs_stats_bytes_written(tcon, ctx->total_len);
2694        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2695
2696        ctx->rc = (rc == 0) ? ctx->total_len : rc;
2697
2698        mutex_unlock(&ctx->aio_mutex);
2699
2700        if (ctx->iocb && ctx->iocb->ki_complete)
2701                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2702        else
2703                complete(&ctx->done);
2704}
2705
2706ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2707{
2708        struct file *file = iocb->ki_filp;
2709        ssize_t total_written = 0;
2710        struct cifsFileInfo *cfile;
2711        struct cifs_tcon *tcon;
2712        struct cifs_sb_info *cifs_sb;
2713        struct cifs_aio_ctx *ctx;
2714        struct iov_iter saved_from = *from;
2715        int rc;
2716
2717        /*
2718         * BB - optimize the way when signing is disabled. We can drop this
2719         * extra memory-to-memory copying and use iovec buffers for constructing
2720         * write request.
2721         */
2722
2723        rc = generic_write_checks(iocb, from);
2724        if (rc <= 0)
2725                return rc;
2726
2727        cifs_sb = CIFS_FILE_SB(file);
2728        cfile = file->private_data;
2729        tcon = tlink_tcon(cfile->tlink);
2730
2731        if (!tcon->ses->server->ops->async_writev)
2732                return -ENOSYS;
2733
2734        ctx = cifs_aio_ctx_alloc();
2735        if (!ctx)
2736                return -ENOMEM;
2737
2738        ctx->cfile = cifsFileInfo_get(cfile);
2739
2740        if (!is_sync_kiocb(iocb))
2741                ctx->iocb = iocb;
2742
2743        ctx->pos = iocb->ki_pos;
2744
2745        rc = setup_aio_ctx_iter(ctx, from, WRITE);
2746        if (rc) {
2747                kref_put(&ctx->refcount, cifs_aio_ctx_release);
2748                return rc;
2749        }
2750
2751        /* grab a lock here due to read response handlers can access ctx */
2752        mutex_lock(&ctx->aio_mutex);
2753
2754        rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2755                                  cfile, cifs_sb, &ctx->list, ctx);
2756
2757        /*
2758         * If at least one write was successfully sent, then discard any rc
2759         * value from the later writes. If the other write succeeds, then
2760         * we'll end up returning whatever was written. If it fails, then
2761         * we'll get a new rc value from that.
2762         */
2763        if (!list_empty(&ctx->list))
2764                rc = 0;
2765
2766        mutex_unlock(&ctx->aio_mutex);
2767
2768        if (rc) {
2769                kref_put(&ctx->refcount, cifs_aio_ctx_release);
2770                return rc;
2771        }
2772
2773        if (!is_sync_kiocb(iocb)) {
2774                kref_put(&ctx->refcount, cifs_aio_ctx_release);
2775                return -EIOCBQUEUED;
2776        }
2777
2778        rc = wait_for_completion_killable(&ctx->done);
2779        if (rc) {
2780                mutex_lock(&ctx->aio_mutex);
2781                ctx->rc = rc = -EINTR;
2782                total_written = ctx->total_len;
2783                mutex_unlock(&ctx->aio_mutex);
2784        } else {
2785                rc = ctx->rc;
2786                total_written = ctx->total_len;
2787        }
2788
2789        kref_put(&ctx->refcount, cifs_aio_ctx_release);
2790
2791        if (unlikely(!total_written))
2792                return rc;
2793
2794        iocb->ki_pos += total_written;
2795        return total_written;
2796}
2797
2798static ssize_t
2799cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2800{
2801        struct file *file = iocb->ki_filp;
2802        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2803        struct inode *inode = file->f_mapping->host;
2804        struct cifsInodeInfo *cinode = CIFS_I(inode);
2805        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2806        ssize_t rc;
2807
2808        inode_lock(inode);
2809        /*
2810         * We need to hold the sem to be sure nobody modifies lock list
2811         * with a brlock that prevents writing.
2812         */
2813        down_read(&cinode->lock_sem);
2814
2815        rc = generic_write_checks(iocb, from);
2816        if (rc <= 0)
2817                goto out;
2818
2819        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2820                                     server->vals->exclusive_lock_type, NULL,
2821                                     CIFS_WRITE_OP))
2822                rc = __generic_file_write_iter(iocb, from);
2823        else
2824                rc = -EACCES;
2825out:
2826        up_read(&cinode->lock_sem);
2827        inode_unlock(inode);
2828
2829        if (rc > 0)
2830                rc = generic_write_sync(iocb, rc);
2831        return rc;
2832}
2833
2834ssize_t
2835cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2836{
2837        struct inode *inode = file_inode(iocb->ki_filp);
2838        struct cifsInodeInfo *cinode = CIFS_I(inode);
2839        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2840        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2841                                                iocb->ki_filp->private_data;
2842        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2843        ssize_t written;
2844
2845        written = cifs_get_writer(cinode);
2846        if (written)
2847                return written;
2848
2849        if (CIFS_CACHE_WRITE(cinode)) {
2850                if (cap_unix(tcon->ses) &&
2851                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2852                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2853                        written = generic_file_write_iter(iocb, from);
2854                        goto out;
2855                }
2856                written = cifs_writev(iocb, from);
2857                goto out;
2858        }
2859        /*
2860         * For non-oplocked files in strict cache mode we need to write the data
2861         * to the server exactly from the pos to pos+len-1 rather than flush all
2862         * affected pages because it may cause a error with mandatory locks on
2863         * these pages but not on the region from pos to ppos+len-1.
2864         */
2865        written = cifs_user_writev(iocb, from);
2866        if (written > 0 && CIFS_CACHE_READ(cinode)) {
2867                /*
2868                 * Windows 7 server can delay breaking level2 oplock if a write
2869                 * request comes - break it on the client to prevent reading
2870                 * an old data.
2871                 */
2872                cifs_zap_mapping(inode);
2873                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2874                         inode);
2875                cinode->oplock = 0;
2876        }
2877out:
2878        cifs_put_writer(cinode);
2879        return written;
2880}
2881
2882static struct cifs_readdata *
2883cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2884{
2885        struct cifs_readdata *rdata;
2886
2887        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2888                        GFP_KERNEL);
2889        if (rdata != NULL) {
2890                kref_init(&rdata->refcount);
2891                INIT_LIST_HEAD(&rdata->list);
2892                init_completion(&rdata->done);
2893                INIT_WORK(&rdata->work, complete);
2894        }
2895
2896        return rdata;
2897}
2898
2899void
2900cifs_readdata_release(struct kref *refcount)
2901{
2902        struct cifs_readdata *rdata = container_of(refcount,
2903                                        struct cifs_readdata, refcount);
2904#ifdef CONFIG_CIFS_SMB_DIRECT
2905        if (rdata->mr) {
2906                smbd_deregister_mr(rdata->mr);
2907                rdata->mr = NULL;
2908        }
2909#endif
2910        if (rdata->cfile)
2911                cifsFileInfo_put(rdata->cfile);
2912
2913        kfree(rdata);
2914}
2915
2916static int
2917cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2918{
2919        int rc = 0;
2920        struct page *page;
2921        unsigned int i;
2922
2923        for (i = 0; i < nr_pages; i++) {
2924                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2925                if (!page) {
2926                        rc = -ENOMEM;
2927                        break;
2928                }
2929                rdata->pages[i] = page;
2930        }
2931
2932        if (rc) {
2933                for (i = 0; i < nr_pages; i++) {
2934                        put_page(rdata->pages[i]);
2935                        rdata->pages[i] = NULL;
2936                }
2937        }
2938        return rc;
2939}
2940
2941static void
2942cifs_uncached_readdata_release(struct kref *refcount)
2943{
2944        struct cifs_readdata *rdata = container_of(refcount,
2945                                        struct cifs_readdata, refcount);
2946        unsigned int i;
2947
2948        kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
2949        for (i = 0; i < rdata->nr_pages; i++) {
2950                put_page(rdata->pages[i]);
2951                rdata->pages[i] = NULL;
2952        }
2953        cifs_readdata_release(refcount);
2954}
2955
2956/**
2957 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2958 * @rdata:      the readdata response with list of pages holding data
2959 * @iter:       destination for our data
2960 *
2961 * This function copies data from a list of pages in a readdata response into
2962 * an array of iovecs. It will first calculate where the data should go
2963 * based on the info in the readdata and then copy the data into that spot.
2964 */
2965static int
2966cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2967{
2968        size_t remaining = rdata->got_bytes;
2969        unsigned int i;
2970
2971        for (i = 0; i < rdata->nr_pages; i++) {
2972                struct page *page = rdata->pages[i];
2973                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2974                size_t written;
2975
2976                if (unlikely(iter->type & ITER_PIPE)) {
2977                        void *addr = kmap_atomic(page);
2978
2979                        written = copy_to_iter(addr, copy, iter);
2980                        kunmap_atomic(addr);
2981                } else
2982                        written = copy_page_to_iter(page, 0, copy, iter);
2983                remaining -= written;
2984                if (written < copy && iov_iter_count(iter) > 0)
2985                        break;
2986        }
2987        return remaining ? -EFAULT : 0;
2988}
2989
2990static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
2991
2992static void
2993cifs_uncached_readv_complete(struct work_struct *work)
2994{
2995        struct cifs_readdata *rdata = container_of(work,
2996                                                struct cifs_readdata, work);
2997
2998        complete(&rdata->done);
2999        collect_uncached_read_data(rdata->ctx);
3000        /* the below call can possibly free the last ref to aio ctx */
3001        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3002}
3003
3004static int
3005uncached_fill_pages(struct TCP_Server_Info *server,
3006                    struct cifs_readdata *rdata, struct iov_iter *iter,
3007                    unsigned int len)
3008{
3009        int result = 0;
3010        unsigned int i;
3011        unsigned int nr_pages = rdata->nr_pages;
3012
3013        rdata->got_bytes = 0;
3014        rdata->tailsz = PAGE_SIZE;
3015        for (i = 0; i < nr_pages; i++) {
3016                struct page *page = rdata->pages[i];
3017                size_t n;
3018
3019                if (len <= 0) {
3020                        /* no need to hold page hostage */
3021                        rdata->pages[i] = NULL;
3022                        rdata->nr_pages--;
3023                        put_page(page);
3024                        continue;
3025                }
3026                n = len;
3027                if (len >= PAGE_SIZE) {
3028                        /* enough data to fill the page */
3029                        n = PAGE_SIZE;
3030                        len -= n;
3031                } else {
3032                        zero_user(page, len, PAGE_SIZE - len);
3033                        rdata->tailsz = len;
3034                        len = 0;
3035                }
3036                if (iter)
3037                        result = copy_page_from_iter(page, 0, n, iter);
3038#ifdef CONFIG_CIFS_SMB_DIRECT
3039                else if (rdata->mr)
3040                        result = n;
3041#endif
3042                else
3043                        result = cifs_read_page_from_socket(server, page, n);
3044                if (result < 0)
3045                        break;
3046
3047                rdata->got_bytes += result;
3048        }
3049
3050        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3051                                                rdata->got_bytes : result;
3052}
3053
3054static int
3055cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3056                              struct cifs_readdata *rdata, unsigned int len)
3057{
3058        return uncached_fill_pages(server, rdata, NULL, len);
3059}
3060
3061static int
3062cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3063                              struct cifs_readdata *rdata,
3064                              struct iov_iter *iter)
3065{
3066        return uncached_fill_pages(server, rdata, iter, iter->count);
3067}
3068
3069static int
3070cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3071                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3072                     struct cifs_aio_ctx *ctx)
3073{
3074        struct cifs_readdata *rdata;
3075        unsigned int npages, rsize, credits;
3076        size_t cur_len;
3077        int rc;
3078        pid_t pid;
3079        struct TCP_Server_Info *server;
3080
3081        server = tlink_tcon(open_file->tlink)->ses->server;
3082
3083        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3084                pid = open_file->pid;
3085        else
3086                pid = current->tgid;
3087
3088        do {
3089                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3090                                                   &rsize, &credits);
3091                if (rc)
3092                        break;
3093
3094                cur_len = min_t(const size_t, len, rsize);
3095                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3096
3097                /* allocate a readdata struct */
3098                rdata = cifs_readdata_alloc(npages,
3099                                            cifs_uncached_readv_complete);
3100                if (!rdata) {
3101                        add_credits_and_wake_if(server, credits, 0);
3102                        rc = -ENOMEM;
3103                        break;
3104                }
3105
3106                rc = cifs_read_allocate_pages(rdata, npages);
3107                if (rc)
3108                        goto error;
3109
3110                rdata->cfile = cifsFileInfo_get(open_file);
3111                rdata->nr_pages = npages;
3112                rdata->offset = offset;
3113                rdata->bytes = cur_len;
3114                rdata->pid = pid;
3115                rdata->pagesz = PAGE_SIZE;
3116                rdata->read_into_pages = cifs_uncached_read_into_pages;
3117                rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3118                rdata->credits = credits;
3119                rdata->ctx = ctx;
3120                kref_get(&ctx->refcount);
3121
3122                if (!rdata->cfile->invalidHandle ||
3123                    !(rc = cifs_reopen_file(rdata->cfile, true)))
3124                        rc = server->ops->async_readv(rdata);
3125error:
3126                if (rc) {
3127                        add_credits_and_wake_if(server, rdata->credits, 0);
3128                        kref_put(&rdata->refcount,
3129                                 cifs_uncached_readdata_release);
3130                        if (rc == -EAGAIN)
3131                                continue;
3132                        break;
3133                }
3134
3135                list_add_tail(&rdata->list, rdata_list);
3136                offset += cur_len;
3137                len -= cur_len;
3138        } while (len > 0);
3139
3140        return rc;
3141}
3142
3143static void
3144collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3145{
3146        struct cifs_readdata *rdata, *tmp;
3147        struct iov_iter *to = &ctx->iter;
3148        struct cifs_sb_info *cifs_sb;
3149        struct cifs_tcon *tcon;
3150        unsigned int i;
3151        int rc;
3152
3153        tcon = tlink_tcon(ctx->cfile->tlink);
3154        cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3155
3156        mutex_lock(&ctx->aio_mutex);
3157
3158        if (list_empty(&ctx->list)) {
3159                mutex_unlock(&ctx->aio_mutex);
3160                return;
3161        }
3162
3163        rc = ctx->rc;
3164        /* the loop below should proceed in the order of increasing offsets */
3165again:
3166        list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3167                if (!rc) {
3168                        if (!try_wait_for_completion(&rdata->done)) {
3169                                mutex_unlock(&ctx->aio_mutex);
3170                                return;
3171                        }
3172
3173                        if (rdata->result == -EAGAIN) {
3174                                /* resend call if it's a retryable error */
3175                                struct list_head tmp_list;
3176                                unsigned int got_bytes = rdata->got_bytes;
3177
3178                                list_del_init(&rdata->list);
3179                                INIT_LIST_HEAD(&tmp_list);
3180
3181                                /*
3182                                 * Got a part of data and then reconnect has
3183                                 * happened -- fill the buffer and continue
3184                                 * reading.
3185                                 */
3186                                if (got_bytes && got_bytes < rdata->bytes) {
3187                                        rc = cifs_readdata_to_iov(rdata, to);
3188                                        if (rc) {
3189                                                kref_put(&rdata->refcount,
3190                                                cifs_uncached_readdata_release);
3191                                                continue;
3192                                        }
3193                                }
3194
3195                                rc = cifs_send_async_read(
3196                                                rdata->offset + got_bytes,
3197                                                rdata->bytes - got_bytes,
3198                                                rdata->cfile, cifs_sb,
3199                                                &tmp_list, ctx);
3200
3201                                list_splice(&tmp_list, &ctx->list);
3202
3203                                kref_put(&rdata->refcount,
3204                                         cifs_uncached_readdata_release);
3205                                goto again;
3206                        } else if (rdata->result)
3207                                rc = rdata->result;
3208                        else
3209                                rc = cifs_readdata_to_iov(rdata, to);
3210
3211                        /* if there was a short read -- discard anything left */
3212                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3213                                rc = -ENODATA;
3214                }
3215                list_del_init(&rdata->list);
3216                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3217        }
3218
3219        for (i = 0; i < ctx->npages; i++) {
3220                if (ctx->should_dirty)
3221                        set_page_dirty(ctx->bv[i].bv_page);
3222                put_page(ctx->bv[i].bv_page);
3223        }
3224
3225        ctx->total_len = ctx->len - iov_iter_count(to);
3226
3227        cifs_stats_bytes_read(tcon, ctx->total_len);
3228
3229        /* mask nodata case */
3230        if (rc == -ENODATA)
3231                rc = 0;
3232
3233        ctx->rc = (rc == 0) ? ctx->total_len : rc;
3234
3235        mutex_unlock(&ctx->aio_mutex);
3236
3237        if (ctx->iocb && ctx->iocb->ki_complete)
3238                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3239        else
3240                complete(&ctx->done);
3241}
3242
3243ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3244{
3245        struct file *file = iocb->ki_filp;
3246        ssize_t rc;
3247        size_t len;
3248        ssize_t total_read = 0;
3249        loff_t offset = iocb->ki_pos;
3250        struct cifs_sb_info *cifs_sb;
3251        struct cifs_tcon *tcon;
3252        struct cifsFileInfo *cfile;
3253        struct cifs_aio_ctx *ctx;
3254
3255        len = iov_iter_count(to);
3256        if (!len)
3257                return 0;
3258
3259        cifs_sb = CIFS_FILE_SB(file);
3260        cfile = file->private_data;
3261        tcon = tlink_tcon(cfile->tlink);
3262
3263        if (!tcon->ses->server->ops->async_readv)
3264                return -ENOSYS;
3265
3266        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3267                cifs_dbg(FYI, "attempting read on write only file instance\n");
3268
3269        ctx = cifs_aio_ctx_alloc();
3270        if (!ctx)
3271                return -ENOMEM;
3272
3273        ctx->cfile = cifsFileInfo_get(cfile);
3274
3275        if (!is_sync_kiocb(iocb))
3276                ctx->iocb = iocb;
3277
3278        if (to->type == ITER_IOVEC)
3279                ctx->should_dirty = true;
3280
3281        rc = setup_aio_ctx_iter(ctx, to, READ);
3282        if (rc) {
3283                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3284                return rc;
3285        }
3286
3287        len = ctx->len;
3288
3289        /* grab a lock here due to read response handlers can access ctx */
3290        mutex_lock(&ctx->aio_mutex);
3291
3292        rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3293
3294        /* if at least one read request send succeeded, then reset rc */
3295        if (!list_empty(&ctx->list))
3296                rc = 0;
3297
3298        mutex_unlock(&ctx->aio_mutex);
3299
3300        if (rc) {
3301                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3302                return rc;
3303        }
3304
3305        if (!is_sync_kiocb(iocb)) {
3306                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3307                return -EIOCBQUEUED;
3308        }
3309
3310        rc = wait_for_completion_killable(&ctx->done);
3311        if (rc) {
3312                mutex_lock(&ctx->aio_mutex);
3313                ctx->rc = rc = -EINTR;
3314                total_read = ctx->total_len;
3315                mutex_unlock(&ctx->aio_mutex);
3316        } else {
3317                rc = ctx->rc;
3318                total_read = ctx->total_len;
3319        }
3320
3321        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3322
3323        if (total_read) {
3324                iocb->ki_pos += total_read;
3325                return total_read;
3326        }
3327        return rc;
3328}
3329
3330ssize_t
3331cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3332{
3333        struct inode *inode = file_inode(iocb->ki_filp);
3334        struct cifsInodeInfo *cinode = CIFS_I(inode);
3335        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3336        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3337                                                iocb->ki_filp->private_data;
3338        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3339        int rc = -EACCES;
3340
3341        /*
3342         * In strict cache mode we need to read from the server all the time
3343         * if we don't have level II oplock because the server can delay mtime
3344         * change - so we can't make a decision about inode invalidating.
3345         * And we can also fail with pagereading if there are mandatory locks
3346         * on pages affected by this read but not on the region from pos to
3347         * pos+len-1.
3348         */
3349        if (!CIFS_CACHE_READ(cinode))
3350                return cifs_user_readv(iocb, to);
3351
3352        if (cap_unix(tcon->ses) &&
3353            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3354            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3355                return generic_file_read_iter(iocb, to);
3356
3357        /*
3358         * We need to hold the sem to be sure nobody modifies lock list
3359         * with a brlock that prevents reading.
3360         */
3361        down_read(&cinode->lock_sem);
3362        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3363                                     tcon->ses->server->vals->shared_lock_type,
3364                                     NULL, CIFS_READ_OP))
3365                rc = generic_file_read_iter(iocb, to);
3366        up_read(&cinode->lock_sem);
3367        return rc;
3368}
3369
3370static ssize_t
3371cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3372{
3373        int rc = -EACCES;
3374        unsigned int bytes_read = 0;
3375        unsigned int total_read;
3376        unsigned int current_read_size;
3377        unsigned int rsize;
3378        struct cifs_sb_info *cifs_sb;
3379        struct cifs_tcon *tcon;
3380        struct TCP_Server_Info *server;
3381        unsigned int xid;
3382        char *cur_offset;
3383        struct cifsFileInfo *open_file;
3384        struct cifs_io_parms io_parms;
3385        int buf_type = CIFS_NO_BUFFER;
3386        __u32 pid;
3387
3388        xid = get_xid();
3389        cifs_sb = CIFS_FILE_SB(file);
3390
3391        /* FIXME: set up handlers for larger reads and/or convert to async */
3392        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3393
3394        if (file->private_data == NULL) {
3395                rc = -EBADF;
3396                free_xid(xid);
3397                return rc;
3398        }
3399        open_file = file->private_data;
3400        tcon = tlink_tcon(open_file->tlink);
3401        server = tcon->ses->server;
3402
3403        if (!server->ops->sync_read) {
3404                free_xid(xid);
3405                return -ENOSYS;
3406        }
3407
3408        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3409                pid = open_file->pid;
3410        else
3411                pid = current->tgid;
3412
3413        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3414                cifs_dbg(FYI, "attempting read on write only file instance\n");
3415
3416        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3417             total_read += bytes_read, cur_offset += bytes_read) {
3418                do {
3419                        current_read_size = min_t(uint, read_size - total_read,
3420                                                  rsize);
3421                        /*
3422                         * For windows me and 9x we do not want to request more
3423                         * than it negotiated since it will refuse the read
3424                         * then.
3425                         */
3426                        if ((tcon->ses) && !(tcon->ses->capabilities &
3427                                tcon->ses->server->vals->cap_large_files)) {
3428                                current_read_size = min_t(uint,
3429                                        current_read_size, CIFSMaxBufSize);
3430                        }
3431                        if (open_file->invalidHandle) {
3432                                rc = cifs_reopen_file(open_file, true);
3433                                if (rc != 0)
3434                                        break;
3435                        }
3436                        io_parms.pid = pid;
3437                        io_parms.tcon = tcon;
3438                        io_parms.offset = *offset;
3439                        io_parms.length = current_read_size;
3440                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3441                                                    &bytes_read, &cur_offset,
3442                                                    &buf_type);
3443                } while (rc == -EAGAIN);
3444
3445                if (rc || (bytes_read == 0)) {
3446                        if (total_read) {
3447                                break;
3448                        } else {
3449                                free_xid(xid);
3450                                return rc;
3451                        }
3452                } else {
3453                        cifs_stats_bytes_read(tcon, total_read);
3454                        *offset += bytes_read;
3455                }
3456        }
3457        free_xid(xid);
3458        return total_read;
3459}
3460
3461/*
3462 * If the page is mmap'ed into a process' page tables, then we need to make
3463 * sure that it doesn't change while being written back.
3464 */
3465static vm_fault_t
3466cifs_page_mkwrite(struct vm_fault *vmf)
3467{
3468        struct page *page = vmf->page;
3469
3470        lock_page(page);
3471        return VM_FAULT_LOCKED;
3472}
3473
3474static const struct vm_operations_struct cifs_file_vm_ops = {
3475        .fault = filemap_fault,
3476        .map_pages = filemap_map_pages,
3477        .page_mkwrite = cifs_page_mkwrite,
3478};
3479
3480int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3481{
3482        int xid, rc = 0;
3483        struct inode *inode = file_inode(file);
3484
3485        xid = get_xid();
3486
3487        if (!CIFS_CACHE_READ(CIFS_I(inode)))
3488                rc = cifs_zap_mapping(inode);
3489        if (!rc)
3490                rc = generic_file_mmap(file, vma);
3491        if (!rc)
3492                vma->vm_ops = &cifs_file_vm_ops;
3493
3494        free_xid(xid);
3495        return rc;
3496}
3497
3498int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3499{
3500        int rc, xid;
3501
3502        xid = get_xid();
3503
3504        rc = cifs_revalidate_file(file);
3505        if (rc)
3506                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3507                         rc);
3508        if (!rc)
3509                rc = generic_file_mmap(file, vma);
3510        if (!rc)
3511                vma->vm_ops = &cifs_file_vm_ops;
3512
3513        free_xid(xid);
3514        return rc;
3515}
3516
3517static void
3518cifs_readv_complete(struct work_struct *work)
3519{
3520        unsigned int i, got_bytes;
3521        struct cifs_readdata *rdata = container_of(work,
3522                                                struct cifs_readdata, work);
3523
3524        got_bytes = rdata->got_bytes;
3525        for (i = 0; i < rdata->nr_pages; i++) {
3526                struct page *page = rdata->pages[i];
3527
3528                lru_cache_add_file(page);
3529
3530                if (rdata->result == 0 ||
3531                    (rdata->result == -EAGAIN && got_bytes)) {
3532                        flush_dcache_page(page);
3533                        SetPageUptodate(page);
3534                }
3535
3536                unlock_page(page);
3537
3538                if (rdata->result == 0 ||
3539                    (rdata->result == -EAGAIN && got_bytes))
3540                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3541
3542                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3543
3544                put_page(page);
3545                rdata->pages[i] = NULL;
3546        }
3547        kref_put(&rdata->refcount, cifs_readdata_release);
3548}
3549
3550static int
3551readpages_fill_pages(struct TCP_Server_Info *server,
3552                     struct cifs_readdata *rdata, struct iov_iter *iter,
3553                     unsigned int len)
3554{
3555        int result = 0;
3556        unsigned int i;
3557        u64 eof;
3558        pgoff_t eof_index;
3559        unsigned int nr_pages = rdata->nr_pages;
3560
3561        /* determine the eof that the server (probably) has */
3562        eof = CIFS_I(rdata->mapping->host)->server_eof;
3563        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3564        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3565
3566        rdata->got_bytes = 0;
3567        rdata->tailsz = PAGE_SIZE;
3568        for (i = 0; i < nr_pages; i++) {
3569                struct page *page = rdata->pages[i];
3570                size_t n = PAGE_SIZE;
3571
3572                if (len >= PAGE_SIZE) {
3573                        len -= PAGE_SIZE;
3574                } else if (len > 0) {
3575                        /* enough for partial page, fill and zero the rest */
3576                        zero_user(page, len, PAGE_SIZE - len);
3577                        n = rdata->tailsz = len;
3578                        len = 0;
3579                } else if (page->index > eof_index) {
3580                        /*
3581                         * The VFS will not try to do readahead past the
3582                         * i_size, but it's possible that we have outstanding
3583                         * writes with gaps in the middle and the i_size hasn't
3584                         * caught up yet. Populate those with zeroed out pages
3585                         * to prevent the VFS from repeatedly attempting to
3586                         * fill them until the writes are flushed.
3587                         */
3588                        zero_user(page, 0, PAGE_SIZE);
3589                        lru_cache_add_file(page);
3590                        flush_dcache_page(page);
3591                        SetPageUptodate(page);
3592                        unlock_page(page);
3593                        put_page(page);
3594                        rdata->pages[i] = NULL;
3595                        rdata->nr_pages--;
3596                        continue;
3597                } else {
3598                        /* no need to hold page hostage */
3599                        lru_cache_add_file(page);
3600                        unlock_page(page);
3601                        put_page(page);
3602                        rdata->pages[i] = NULL;
3603                        rdata->nr_pages--;
3604                        continue;
3605                }
3606
3607                if (iter)
3608                        result = copy_page_from_iter(page, 0, n, iter);
3609#ifdef CONFIG_CIFS_SMB_DIRECT
3610                else if (rdata->mr)
3611                        result = n;
3612#endif
3613                else
3614                        result = cifs_read_page_from_socket(server, page, n);
3615                if (result < 0)
3616                        break;
3617
3618                rdata->got_bytes += result;
3619        }
3620
3621        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3622                                                rdata->got_bytes : result;
3623}
3624
3625static int
3626cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3627                               struct cifs_readdata *rdata, unsigned int len)
3628{
3629        return readpages_fill_pages(server, rdata, NULL, len);
3630}
3631
3632static int
3633cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3634                               struct cifs_readdata *rdata,
3635                               struct iov_iter *iter)
3636{
3637        return readpages_fill_pages(server, rdata, iter, iter->count);
3638}
3639
3640static int
3641readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3642                    unsigned int rsize, struct list_head *tmplist,
3643                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3644{
3645        struct page *page, *tpage;
3646        unsigned int expected_index;
3647        int rc;
3648        gfp_t gfp = readahead_gfp_mask(mapping);
3649
3650        INIT_LIST_HEAD(tmplist);
3651
3652        page = list_entry(page_list->prev, struct page, lru);
3653
3654        /*
3655         * Lock the page and put it in the cache. Since no one else
3656         * should have access to this page, we're safe to simply set
3657         * PG_locked without checking it first.
3658         */
3659        __SetPageLocked(page);
3660        rc = add_to_page_cache_locked(page, mapping,
3661                                      page->index, gfp);
3662
3663        /* give up if we can't stick it in the cache */
3664        if (rc) {
3665                __ClearPageLocked(page);
3666                return rc;
3667        }
3668
3669        /* move first page to the tmplist */
3670        *offset = (loff_t)page->index << PAGE_SHIFT;
3671        *bytes = PAGE_SIZE;
3672        *nr_pages = 1;
3673        list_move_tail(&page->lru, tmplist);
3674
3675        /* now try and add more pages onto the request */
3676        expected_index = page->index + 1;
3677        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3678                /* discontinuity ? */
3679                if (page->index != expected_index)
3680                        break;
3681
3682                /* would this page push the read over the rsize? */
3683                if (*bytes + PAGE_SIZE > rsize)
3684                        break;
3685
3686                __SetPageLocked(page);
3687                if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3688                        __ClearPageLocked(page);
3689                        break;
3690                }
3691                list_move_tail(&page->lru, tmplist);
3692                (*bytes) += PAGE_SIZE;
3693                expected_index++;
3694                (*nr_pages)++;
3695        }
3696        return rc;
3697}
3698
3699static int cifs_readpages(struct file *file, struct address_space *mapping,
3700        struct list_head *page_list, unsigned num_pages)
3701{
3702        int rc;
3703        struct list_head tmplist;
3704        struct cifsFileInfo *open_file = file->private_data;
3705        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3706        struct TCP_Server_Info *server;
3707        pid_t pid;
3708
3709        /*
3710         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3711         * immediately if the cookie is negative
3712         *
3713         * After this point, every page in the list might have PG_fscache set,
3714         * so we will need to clean that up off of every page we don't use.
3715         */
3716        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3717                                         &num_pages);
3718        if (rc == 0)
3719                return rc;
3720
3721        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3722                pid = open_file->pid;
3723        else
3724                pid = current->tgid;
3725
3726        rc = 0;
3727        server = tlink_tcon(open_file->tlink)->ses->server;
3728
3729        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3730                 __func__, file, mapping, num_pages);
3731
3732        /*
3733         * Start with the page at end of list and move it to private
3734         * list. Do the same with any following pages until we hit
3735         * the rsize limit, hit an index discontinuity, or run out of
3736         * pages. Issue the async read and then start the loop again
3737         * until the list is empty.
3738         *
3739         * Note that list order is important. The page_list is in
3740         * the order of declining indexes. When we put the pages in
3741         * the rdata->pages, then we want them in increasing order.
3742         */
3743        while (!list_empty(page_list)) {
3744                unsigned int i, nr_pages, bytes, rsize;
3745                loff_t offset;
3746                struct page *page, *tpage;
3747                struct cifs_readdata *rdata;
3748                unsigned credits;
3749
3750                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3751                                                   &rsize, &credits);
3752                if (rc)
3753                        break;
3754
3755                /*
3756                 * Give up immediately if rsize is too small to read an entire
3757                 * page. The VFS will fall back to readpage. We should never
3758                 * reach this point however since we set ra_pages to 0 when the
3759                 * rsize is smaller than a cache page.
3760                 */
3761                if (unlikely(rsize < PAGE_SIZE)) {
3762                        add_credits_and_wake_if(server, credits, 0);
3763                        return 0;
3764                }
3765
3766                rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3767                                         &nr_pages, &offset, &bytes);
3768                if (rc) {
3769                        add_credits_and_wake_if(server, credits, 0);
3770                        break;
3771                }
3772
3773                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3774                if (!rdata) {
3775                        /* best to give up if we're out of mem */
3776                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3777                                list_del(&page->lru);
3778                                lru_cache_add_file(page);
3779                                unlock_page(page);
3780                                put_page(page);
3781                        }
3782                        rc = -ENOMEM;
3783                        add_credits_and_wake_if(server, credits, 0);
3784                        break;
3785                }
3786
3787                rdata->cfile = cifsFileInfo_get(open_file);
3788                rdata->mapping = mapping;
3789                rdata->offset = offset;
3790                rdata->bytes = bytes;
3791                rdata->pid = pid;
3792                rdata->pagesz = PAGE_SIZE;
3793                rdata->read_into_pages = cifs_readpages_read_into_pages;
3794                rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3795                rdata->credits = credits;
3796
3797                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3798                        list_del(&page->lru);
3799                        rdata->pages[rdata->nr_pages++] = page;
3800                }
3801
3802                if (!rdata->cfile->invalidHandle ||
3803                    !(rc = cifs_reopen_file(rdata->cfile, true)))
3804                        rc = server->ops->async_readv(rdata);
3805                if (rc) {
3806                        add_credits_and_wake_if(server, rdata->credits, 0);
3807                        for (i = 0; i < rdata->nr_pages; i++) {
3808                                page = rdata->pages[i];
3809                                lru_cache_add_file(page);
3810                                unlock_page(page);
3811                                put_page(page);
3812                        }
3813                        /* Fallback to the readpage in error/reconnect cases */
3814                        kref_put(&rdata->refcount, cifs_readdata_release);
3815                        break;
3816                }
3817
3818                kref_put(&rdata->refcount, cifs_readdata_release);
3819        }
3820
3821        /* Any pages that have been shown to fscache but didn't get added to
3822         * the pagecache must be uncached before they get returned to the
3823         * allocator.
3824         */
3825        cifs_fscache_readpages_cancel(mapping->host, page_list);
3826        return rc;
3827}
3828
3829/*
3830 * cifs_readpage_worker must be called with the page pinned
3831 */
3832static int cifs_readpage_worker(struct file *file, struct page *page,
3833        loff_t *poffset)
3834{
3835        char *read_data;
3836        int rc;
3837
3838        /* Is the page cached? */
3839        rc = cifs_readpage_from_fscache(file_inode(file), page);
3840        if (rc == 0)
3841                goto read_complete;
3842
3843        read_data = kmap(page);
3844        /* for reads over a certain size could initiate async read ahead */
3845
3846        rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3847
3848        if (rc < 0)
3849                goto io_error;
3850        else
3851                cifs_dbg(FYI, "Bytes read %d\n", rc);
3852
3853        file_inode(file)->i_atime =
3854                current_time(file_inode(file));
3855
3856        if (PAGE_SIZE > rc)
3857                memset(read_data + rc, 0, PAGE_SIZE - rc);
3858
3859        flush_dcache_page(page);
3860        SetPageUptodate(page);
3861
3862        /* send this page to the cache */
3863        cifs_readpage_to_fscache(file_inode(file), page);
3864
3865        rc = 0;
3866
3867io_error:
3868        kunmap(page);
3869        unlock_page(page);
3870
3871read_complete:
3872        return rc;
3873}
3874
3875static int cifs_readpage(struct file *file, struct page *page)
3876{
3877        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3878        int rc = -EACCES;
3879        unsigned int xid;
3880
3881        xid = get_xid();
3882
3883        if (file->private_data == NULL) {
3884                rc = -EBADF;
3885                free_xid(xid);
3886                return rc;
3887        }
3888
3889        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3890                 page, (int)offset, (int)offset);
3891
3892        rc = cifs_readpage_worker(file, page, &offset);
3893
3894        free_xid(xid);
3895        return rc;
3896}
3897
3898static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3899{
3900        struct cifsFileInfo *open_file;
3901        struct cifs_tcon *tcon =
3902                cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3903
3904        spin_lock(&tcon->open_file_lock);
3905        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3906                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3907                        spin_unlock(&tcon->open_file_lock);
3908                        return 1;
3909                }
3910        }
3911        spin_unlock(&tcon->open_file_lock);
3912        return 0;
3913}
3914
3915/* We do not want to update the file size from server for inodes
3916   open for write - to avoid races with writepage extending
3917   the file - in the future we could consider allowing
3918   refreshing the inode only on increases in the file size
3919   but this is tricky to do without racing with writebehind
3920   page caching in the current Linux kernel design */
3921bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3922{
3923        if (!cifsInode)
3924                return true;
3925
3926        if (is_inode_writable(cifsInode)) {
3927                /* This inode is open for write at least once */
3928                struct cifs_sb_info *cifs_sb;
3929
3930                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3931                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3932                        /* since no page cache to corrupt on directio
3933                        we can change size safely */
3934                        return true;
3935                }
3936
3937                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3938                        return true;
3939
3940                return false;
3941        } else
3942                return true;
3943}
3944
3945static int cifs_write_begin(struct file *file, struct address_space *mapping,
3946                        loff_t pos, unsigned len, unsigned flags,
3947                        struct page **pagep, void **fsdata)
3948{
3949        int oncethru = 0;
3950        pgoff_t index = pos >> PAGE_SHIFT;
3951        loff_t offset = pos & (PAGE_SIZE - 1);
3952        loff_t page_start = pos & PAGE_MASK;
3953        loff_t i_size;
3954        struct page *page;
3955        int rc = 0;
3956
3957        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3958
3959start:
3960        page = grab_cache_page_write_begin(mapping, index, flags);
3961        if (!page) {
3962                rc = -ENOMEM;
3963                goto out;
3964        }
3965
3966        if (PageUptodate(page))
3967                goto out;
3968
3969        /*
3970         * If we write a full page it will be up to date, no need to read from
3971         * the server. If the write is short, we'll end up doing a sync write
3972         * instead.
3973         */
3974        if (len == PAGE_SIZE)
3975                goto out;
3976
3977        /*
3978         * optimize away the read when we have an oplock, and we're not
3979         * expecting to use any of the data we'd be reading in. That
3980         * is, when the page lies beyond the EOF, or straddles the EOF
3981         * and the write will cover all of the existing data.
3982         */
3983        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3984                i_size = i_size_read(mapping->host);
3985                if (page_start >= i_size ||
3986                    (offset == 0 && (pos + len) >= i_size)) {
3987                        zero_user_segments(page, 0, offset,
3988                                           offset + len,
3989                                           PAGE_SIZE);
3990                        /*
3991                         * PageChecked means that the parts of the page
3992                         * to which we're not writing are considered up
3993                         * to date. Once the data is copied to the
3994                         * page, it can be set uptodate.
3995                         */
3996                        SetPageChecked(page);
3997                        goto out;
3998                }
3999        }
4000
4001        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4002                /*
4003                 * might as well read a page, it is fast enough. If we get
4004                 * an error, we don't need to return it. cifs_write_end will
4005                 * do a sync write instead since PG_uptodate isn't set.
4006                 */
4007                cifs_readpage_worker(file, page, &page_start);
4008                put_page(page);
4009                oncethru = 1;
4010                goto start;
4011        } else {
4012                /* we could try using another file handle if there is one -
4013                   but how would we lock it to prevent close of that handle
4014                   racing with this read? In any case
4015                   this will be written out by write_end so is fine */
4016        }
4017out:
4018        *pagep = page;
4019        return rc;
4020}
4021
4022static int cifs_release_page(struct page *page, gfp_t gfp)
4023{
4024        if (PagePrivate(page))
4025                return 0;
4026
4027        return cifs_fscache_release_page(page, gfp);
4028}
4029
4030static void cifs_invalidate_page(struct page *page, unsigned int offset,
4031                                 unsigned int length)
4032{
4033        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4034
4035        if (offset == 0 && length == PAGE_SIZE)
4036                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4037}
4038
4039static int cifs_launder_page(struct page *page)
4040{
4041        int rc = 0;
4042        loff_t range_start = page_offset(page);
4043        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4044        struct writeback_control wbc = {
4045                .sync_mode = WB_SYNC_ALL,
4046                .nr_to_write = 0,
4047                .range_start = range_start,
4048                .range_end = range_end,
4049        };
4050
4051        cifs_dbg(FYI, "Launder page: %p\n", page);
4052
4053        if (clear_page_dirty_for_io(page))
4054                rc = cifs_writepage_locked(page, &wbc);
4055
4056        cifs_fscache_invalidate_page(page, page->mapping->host);
4057        return rc;
4058}
4059
4060void cifs_oplock_break(struct work_struct *work)
4061{
4062        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4063                                                  oplock_break);
4064        struct inode *inode = d_inode(cfile->dentry);
4065        struct cifsInodeInfo *cinode = CIFS_I(inode);
4066        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4067        struct TCP_Server_Info *server = tcon->ses->server;
4068        int rc = 0;
4069
4070        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4071                        TASK_UNINTERRUPTIBLE);
4072
4073        server->ops->downgrade_oplock(server, cinode,
4074                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4075
4076        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4077                                                cifs_has_mand_locks(cinode)) {
4078                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4079                         inode);
4080                cinode->oplock = 0;
4081        }
4082
4083        if (inode && S_ISREG(inode->i_mode)) {
4084                if (CIFS_CACHE_READ(cinode))
4085                        break_lease(inode, O_RDONLY);
4086                else
4087                        break_lease(inode, O_WRONLY);
4088                rc = filemap_fdatawrite(inode->i_mapping);
4089                if (!CIFS_CACHE_READ(cinode)) {
4090                        rc = filemap_fdatawait(inode->i_mapping);
4091                        mapping_set_error(inode->i_mapping, rc);
4092                        cifs_zap_mapping(inode);
4093                }
4094                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4095        }
4096
4097        rc = cifs_push_locks(cfile);
4098        if (rc)
4099                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4100
4101        /*
4102         * releasing stale oplock after recent reconnect of smb session using
4103         * a now incorrect file handle is not a data integrity issue but do
4104         * not bother sending an oplock release if session to server still is
4105         * disconnected since oplock already released by the server
4106         */
4107        if (!cfile->oplock_break_cancelled) {
4108                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4109                                                             cinode);
4110                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4111        }
4112        cifs_done_oplock_break(cinode);
4113}
4114
4115/*
4116 * The presence of cifs_direct_io() in the address space ops vector
4117 * allowes open() O_DIRECT flags which would have failed otherwise.
4118 *
4119 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4120 * so this method should never be called.
4121 *
4122 * Direct IO is not yet supported in the cached mode. 
4123 */
4124static ssize_t
4125cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4126{
4127        /*
4128         * FIXME
4129         * Eventually need to support direct IO for non forcedirectio mounts
4130         */
4131        return -EINVAL;
4132}
4133
4134
4135const struct address_space_operations cifs_addr_ops = {
4136        .readpage = cifs_readpage,
4137        .readpages = cifs_readpages,
4138        .writepage = cifs_writepage,
4139        .writepages = cifs_writepages,
4140        .write_begin = cifs_write_begin,
4141        .write_end = cifs_write_end,
4142        .set_page_dirty = __set_page_dirty_nobuffers,
4143        .releasepage = cifs_release_page,
4144        .direct_IO = cifs_direct_io,
4145        .invalidatepage = cifs_invalidate_page,
4146        .launder_page = cifs_launder_page,
4147};
4148
4149/*
4150 * cifs_readpages requires the server to support a buffer large enough to
4151 * contain the header plus one complete page of data.  Otherwise, we need
4152 * to leave cifs_readpages out of the address space operations.
4153 */
4154const struct address_space_operations cifs_addr_ops_smallbuf = {
4155        .readpage = cifs_readpage,
4156        .writepage = cifs_writepage,
4157        .writepages = cifs_writepages,
4158        .write_begin = cifs_write_begin,
4159        .write_end = cifs_write_end,
4160        .set_page_dirty = __set_page_dirty_nobuffers,
4161        .releasepage = cifs_release_page,
4162        .invalidatepage = cifs_invalidate_page,
4163        .launder_page = cifs_launder_page,
4164};
4165