linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_sb->mnt_cifs_flags &
 144                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
 145        cifs_put_tlink(tlink);
 146
 147        if (rc)
 148                goto posix_open_ret;
 149
 150        if (presp_data->Type == cpu_to_le32(-1))
 151                goto posix_open_ret; /* open ok, caller does qpathinfo */
 152
 153        if (!pinode)
 154                goto posix_open_ret; /* caller does not need info */
 155
 156        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 157
 158        /* get new inode and set it up */
 159        if (*pinode == NULL) {
 160                cifs_fill_uniqueid(sb, &fattr);
 161                *pinode = cifs_iget(sb, &fattr);
 162                if (!*pinode) {
 163                        rc = -ENOMEM;
 164                        goto posix_open_ret;
 165                }
 166        } else {
 167                cifs_fattr_to_inode(*pinode, &fattr);
 168        }
 169
 170posix_open_ret:
 171        kfree(presp_data);
 172        return rc;
 173}
 174
 175static int
 176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 177             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 178             struct cifs_fid *fid, unsigned int xid)
 179{
 180        int rc;
 181        int desired_access;
 182        int disposition;
 183        int create_options = CREATE_NOT_DIR;
 184        FILE_ALL_INFO *buf;
 185        struct TCP_Server_Info *server = tcon->ses->server;
 186        struct cifs_open_parms oparms;
 187
 188        if (!server->ops->open)
 189                return -ENOSYS;
 190
 191        desired_access = cifs_convert_flags(f_flags);
 192
 193/*********************************************************************
 194 *  open flag mapping table:
 195 *
 196 *      POSIX Flag            CIFS Disposition
 197 *      ----------            ----------------
 198 *      O_CREAT               FILE_OPEN_IF
 199 *      O_CREAT | O_EXCL      FILE_CREATE
 200 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 201 *      O_TRUNC               FILE_OVERWRITE
 202 *      none of the above     FILE_OPEN
 203 *
 204 *      Note that there is not a direct match between disposition
 205 *      FILE_SUPERSEDE (ie create whether or not file exists although
 206 *      O_CREAT | O_TRUNC is similar but truncates the existing
 207 *      file rather than creating a new file as FILE_SUPERSEDE does
 208 *      (which uses the attributes / metadata passed in on open call)
 209 *?
 210 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 211 *?  and the read write flags match reasonably.  O_LARGEFILE
 212 *?  is irrelevant because largefile support is always used
 213 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 214 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 215 *********************************************************************/
 216
 217        disposition = cifs_get_disposition(f_flags);
 218
 219        /* BB pass O_SYNC flag through on file attributes .. BB */
 220
 221        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 222        if (!buf)
 223                return -ENOMEM;
 224
 225        if (backup_cred(cifs_sb))
 226                create_options |= CREATE_OPEN_BACKUP_INTENT;
 227
 228        oparms.tcon = tcon;
 229        oparms.cifs_sb = cifs_sb;
 230        oparms.desired_access = desired_access;
 231        oparms.create_options = create_options;
 232        oparms.disposition = disposition;
 233        oparms.path = full_path;
 234        oparms.fid = fid;
 235        oparms.reconnect = false;
 236
 237        rc = server->ops->open(xid, &oparms, oplock, buf);
 238
 239        if (rc)
 240                goto out;
 241
 242        if (tcon->unix_ext)
 243                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 244                                              xid);
 245        else
 246                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 247                                         xid, &fid->netfid);
 248
 249out:
 250        kfree(buf);
 251        return rc;
 252}
 253
 254static bool
 255cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 256{
 257        struct cifs_fid_locks *cur;
 258        bool has_locks = false;
 259
 260        down_read(&cinode->lock_sem);
 261        list_for_each_entry(cur, &cinode->llist, llist) {
 262                if (!list_empty(&cur->locks)) {
 263                        has_locks = true;
 264                        break;
 265                }
 266        }
 267        up_read(&cinode->lock_sem);
 268        return has_locks;
 269}
 270
 271struct cifsFileInfo *
 272cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 273                  struct tcon_link *tlink, __u32 oplock)
 274{
 275        struct dentry *dentry = file->f_path.dentry;
 276        struct inode *inode = dentry->d_inode;
 277        struct cifsInodeInfo *cinode = CIFS_I(inode);
 278        struct cifsFileInfo *cfile;
 279        struct cifs_fid_locks *fdlocks;
 280        struct cifs_tcon *tcon = tlink_tcon(tlink);
 281        struct TCP_Server_Info *server = tcon->ses->server;
 282
 283        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 284        if (cfile == NULL)
 285                return cfile;
 286
 287        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 288        if (!fdlocks) {
 289                kfree(cfile);
 290                return NULL;
 291        }
 292
 293        INIT_LIST_HEAD(&fdlocks->locks);
 294        fdlocks->cfile = cfile;
 295        cfile->llist = fdlocks;
 296        down_write(&cinode->lock_sem);
 297        list_add(&fdlocks->llist, &cinode->llist);
 298        up_write(&cinode->lock_sem);
 299
 300        cfile->count = 1;
 301        cfile->pid = current->tgid;
 302        cfile->uid = current_fsuid();
 303        cfile->dentry = dget(dentry);
 304        cfile->f_flags = file->f_flags;
 305        cfile->invalidHandle = false;
 306        cfile->tlink = cifs_get_tlink(tlink);
 307        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 308        mutex_init(&cfile->fh_mutex);
 309
 310        cifs_sb_active(inode->i_sb);
 311
 312        /*
 313         * If the server returned a read oplock and we have mandatory brlocks,
 314         * set oplock level to None.
 315         */
 316        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 317                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 318                oplock = 0;
 319        }
 320
 321        spin_lock(&cifs_file_list_lock);
 322        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 323                oplock = fid->pending_open->oplock;
 324        list_del(&fid->pending_open->olist);
 325
 326        fid->purge_cache = false;
 327        server->ops->set_fid(cfile, fid, oplock);
 328
 329        list_add(&cfile->tlist, &tcon->openFileList);
 330        /* if readable file instance put first in list*/
 331        if (file->f_mode & FMODE_READ)
 332                list_add(&cfile->flist, &cinode->openFileList);
 333        else
 334                list_add_tail(&cfile->flist, &cinode->openFileList);
 335        spin_unlock(&cifs_file_list_lock);
 336
 337        if (fid->purge_cache)
 338                cifs_invalidate_mapping(inode);
 339
 340        file->private_data = cfile;
 341        return cfile;
 342}
 343
 344struct cifsFileInfo *
 345cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 346{
 347        spin_lock(&cifs_file_list_lock);
 348        cifsFileInfo_get_locked(cifs_file);
 349        spin_unlock(&cifs_file_list_lock);
 350        return cifs_file;
 351}
 352
 353/*
 354 * Release a reference on the file private data. This may involve closing
 355 * the filehandle out on the server. Must be called without holding
 356 * cifs_file_list_lock.
 357 */
 358void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 359{
 360        struct inode *inode = cifs_file->dentry->d_inode;
 361        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 362        struct TCP_Server_Info *server = tcon->ses->server;
 363        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 364        struct super_block *sb = inode->i_sb;
 365        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 366        struct cifsLockInfo *li, *tmp;
 367        struct cifs_fid fid;
 368        struct cifs_pending_open open;
 369
 370        spin_lock(&cifs_file_list_lock);
 371        if (--cifs_file->count > 0) {
 372                spin_unlock(&cifs_file_list_lock);
 373                return;
 374        }
 375
 376        if (server->ops->get_lease_key)
 377                server->ops->get_lease_key(inode, &fid);
 378
 379        /* store open in pending opens to make sure we don't miss lease break */
 380        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 381
 382        /* remove it from the lists */
 383        list_del(&cifs_file->flist);
 384        list_del(&cifs_file->tlist);
 385
 386        if (list_empty(&cifsi->openFileList)) {
 387                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 388                         cifs_file->dentry->d_inode);
 389                /*
 390                 * In strict cache mode we need invalidate mapping on the last
 391                 * close  because it may cause a error when we open this file
 392                 * again and get at least level II oplock.
 393                 */
 394                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 395                        CIFS_I(inode)->invalid_mapping = true;
 396                cifs_set_oplock_level(cifsi, 0);
 397        }
 398        spin_unlock(&cifs_file_list_lock);
 399
 400        cancel_work_sync(&cifs_file->oplock_break);
 401
 402        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 403                struct TCP_Server_Info *server = tcon->ses->server;
 404                unsigned int xid;
 405
 406                xid = get_xid();
 407                if (server->ops->close)
 408                        server->ops->close(xid, tcon, &cifs_file->fid);
 409                _free_xid(xid);
 410        }
 411
 412        cifs_del_pending_open(&open);
 413
 414        /*
 415         * Delete any outstanding lock records. We'll lose them when the file
 416         * is closed anyway.
 417         */
 418        down_write(&cifsi->lock_sem);
 419        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 420                list_del(&li->llist);
 421                cifs_del_lock_waiters(li);
 422                kfree(li);
 423        }
 424        list_del(&cifs_file->llist->llist);
 425        kfree(cifs_file->llist);
 426        up_write(&cifsi->lock_sem);
 427
 428        cifs_put_tlink(cifs_file->tlink);
 429        dput(cifs_file->dentry);
 430        cifs_sb_deactive(sb);
 431        kfree(cifs_file);
 432}
 433
 434int cifs_open(struct inode *inode, struct file *file)
 435
 436{
 437        int rc = -EACCES;
 438        unsigned int xid;
 439        __u32 oplock;
 440        struct cifs_sb_info *cifs_sb;
 441        struct TCP_Server_Info *server;
 442        struct cifs_tcon *tcon;
 443        struct tcon_link *tlink;
 444        struct cifsFileInfo *cfile = NULL;
 445        char *full_path = NULL;
 446        bool posix_open_ok = false;
 447        struct cifs_fid fid;
 448        struct cifs_pending_open open;
 449
 450        xid = get_xid();
 451
 452        cifs_sb = CIFS_SB(inode->i_sb);
 453        tlink = cifs_sb_tlink(cifs_sb);
 454        if (IS_ERR(tlink)) {
 455                free_xid(xid);
 456                return PTR_ERR(tlink);
 457        }
 458        tcon = tlink_tcon(tlink);
 459        server = tcon->ses->server;
 460
 461        full_path = build_path_from_dentry(file->f_path.dentry);
 462        if (full_path == NULL) {
 463                rc = -ENOMEM;
 464                goto out;
 465        }
 466
 467        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 468                 inode, file->f_flags, full_path);
 469
 470        if (server->oplocks)
 471                oplock = REQ_OPLOCK;
 472        else
 473                oplock = 0;
 474
 475        if (!tcon->broken_posix_open && tcon->unix_ext &&
 476            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 477                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 478                /* can not refresh inode info since size could be stale */
 479                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 480                                cifs_sb->mnt_file_mode /* ignored */,
 481                                file->f_flags, &oplock, &fid.netfid, xid);
 482                if (rc == 0) {
 483                        cifs_dbg(FYI, "posix open succeeded\n");
 484                        posix_open_ok = true;
 485                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 486                        if (tcon->ses->serverNOS)
 487                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 488                                         tcon->ses->serverName,
 489                                         tcon->ses->serverNOS);
 490                        tcon->broken_posix_open = true;
 491                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 492                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 493                        goto out;
 494                /*
 495                 * Else fallthrough to retry open the old way on network i/o
 496                 * or DFS errors.
 497                 */
 498        }
 499
 500        if (server->ops->get_lease_key)
 501                server->ops->get_lease_key(inode, &fid);
 502
 503        cifs_add_pending_open(&fid, tlink, &open);
 504
 505        if (!posix_open_ok) {
 506                if (server->ops->get_lease_key)
 507                        server->ops->get_lease_key(inode, &fid);
 508
 509                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 510                                  file->f_flags, &oplock, &fid, xid);
 511                if (rc) {
 512                        cifs_del_pending_open(&open);
 513                        goto out;
 514                }
 515        }
 516
 517        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 518        if (cfile == NULL) {
 519                if (server->ops->close)
 520                        server->ops->close(xid, tcon, &fid);
 521                cifs_del_pending_open(&open);
 522                rc = -ENOMEM;
 523                goto out;
 524        }
 525
 526        cifs_fscache_set_inode_cookie(inode, file);
 527
 528        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 529                /*
 530                 * Time to set mode which we can not set earlier due to
 531                 * problems creating new read-only files.
 532                 */
 533                struct cifs_unix_set_info_args args = {
 534                        .mode   = inode->i_mode,
 535                        .uid    = INVALID_UID, /* no change */
 536                        .gid    = INVALID_GID, /* no change */
 537                        .ctime  = NO_CHANGE_64,
 538                        .atime  = NO_CHANGE_64,
 539                        .mtime  = NO_CHANGE_64,
 540                        .device = 0,
 541                };
 542                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 543                                       cfile->pid);
 544        }
 545
 546out:
 547        kfree(full_path);
 548        free_xid(xid);
 549        cifs_put_tlink(tlink);
 550        return rc;
 551}
 552
 553static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 554
 555/*
 556 * Try to reacquire byte range locks that were released when session
 557 * to server was lost.
 558 */
 559static int
 560cifs_relock_file(struct cifsFileInfo *cfile)
 561{
 562        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 563        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 564        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 565        int rc = 0;
 566
 567        down_read(&cinode->lock_sem);
 568        if (cinode->can_cache_brlcks) {
 569                /* can cache locks - no need to relock */
 570                up_read(&cinode->lock_sem);
 571                return rc;
 572        }
 573
 574        if (cap_unix(tcon->ses) &&
 575            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 576            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 577                rc = cifs_push_posix_locks(cfile);
 578        else
 579                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 580
 581        up_read(&cinode->lock_sem);
 582        return rc;
 583}
 584
 585static int
 586cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 587{
 588        int rc = -EACCES;
 589        unsigned int xid;
 590        __u32 oplock;
 591        struct cifs_sb_info *cifs_sb;
 592        struct cifs_tcon *tcon;
 593        struct TCP_Server_Info *server;
 594        struct cifsInodeInfo *cinode;
 595        struct inode *inode;
 596        char *full_path = NULL;
 597        int desired_access;
 598        int disposition = FILE_OPEN;
 599        int create_options = CREATE_NOT_DIR;
 600        struct cifs_open_parms oparms;
 601
 602        xid = get_xid();
 603        mutex_lock(&cfile->fh_mutex);
 604        if (!cfile->invalidHandle) {
 605                mutex_unlock(&cfile->fh_mutex);
 606                rc = 0;
 607                free_xid(xid);
 608                return rc;
 609        }
 610
 611        inode = cfile->dentry->d_inode;
 612        cifs_sb = CIFS_SB(inode->i_sb);
 613        tcon = tlink_tcon(cfile->tlink);
 614        server = tcon->ses->server;
 615
 616        /*
 617         * Can not grab rename sem here because various ops, including those
 618         * that already have the rename sem can end up causing writepage to get
 619         * called and if the server was down that means we end up here, and we
 620         * can never tell if the caller already has the rename_sem.
 621         */
 622        full_path = build_path_from_dentry(cfile->dentry);
 623        if (full_path == NULL) {
 624                rc = -ENOMEM;
 625                mutex_unlock(&cfile->fh_mutex);
 626                free_xid(xid);
 627                return rc;
 628        }
 629
 630        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 631                 inode, cfile->f_flags, full_path);
 632
 633        if (tcon->ses->server->oplocks)
 634                oplock = REQ_OPLOCK;
 635        else
 636                oplock = 0;
 637
 638        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 639            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 640                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 641                /*
 642                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 643                 * original open. Must mask them off for a reopen.
 644                 */
 645                unsigned int oflags = cfile->f_flags &
 646                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 647
 648                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 649                                     cifs_sb->mnt_file_mode /* ignored */,
 650                                     oflags, &oplock, &cfile->fid.netfid, xid);
 651                if (rc == 0) {
 652                        cifs_dbg(FYI, "posix reopen succeeded\n");
 653                        oparms.reconnect = true;
 654                        goto reopen_success;
 655                }
 656                /*
 657                 * fallthrough to retry open the old way on errors, especially
 658                 * in the reconnect path it is important to retry hard
 659                 */
 660        }
 661
 662        desired_access = cifs_convert_flags(cfile->f_flags);
 663
 664        if (backup_cred(cifs_sb))
 665                create_options |= CREATE_OPEN_BACKUP_INTENT;
 666
 667        if (server->ops->get_lease_key)
 668                server->ops->get_lease_key(inode, &cfile->fid);
 669
 670        oparms.tcon = tcon;
 671        oparms.cifs_sb = cifs_sb;
 672        oparms.desired_access = desired_access;
 673        oparms.create_options = create_options;
 674        oparms.disposition = disposition;
 675        oparms.path = full_path;
 676        oparms.fid = &cfile->fid;
 677        oparms.reconnect = true;
 678
 679        /*
 680         * Can not refresh inode by passing in file_info buf to be returned by
 681         * CIFSSMBOpen and then calling get_inode_info with returned buf since
 682         * file might have write behind data that needs to be flushed and server
 683         * version of file size can be stale. If we knew for sure that inode was
 684         * not dirty locally we could do this.
 685         */
 686        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 687        if (rc == -ENOENT && oparms.reconnect == false) {
 688                /* durable handle timeout is expired - open the file again */
 689                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 690                /* indicate that we need to relock the file */
 691                oparms.reconnect = true;
 692        }
 693
 694        if (rc) {
 695                mutex_unlock(&cfile->fh_mutex);
 696                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 697                cifs_dbg(FYI, "oplock: %d\n", oplock);
 698                goto reopen_error_exit;
 699        }
 700
 701reopen_success:
 702        cfile->invalidHandle = false;
 703        mutex_unlock(&cfile->fh_mutex);
 704        cinode = CIFS_I(inode);
 705
 706        if (can_flush) {
 707                rc = filemap_write_and_wait(inode->i_mapping);
 708                mapping_set_error(inode->i_mapping, rc);
 709
 710                if (tcon->unix_ext)
 711                        rc = cifs_get_inode_info_unix(&inode, full_path,
 712                                                      inode->i_sb, xid);
 713                else
 714                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 715                                                 inode->i_sb, xid, NULL);
 716        }
 717        /*
 718         * Else we are writing out data to server already and could deadlock if
 719         * we tried to flush data, and since we do not know if we have data that
 720         * would invalidate the current end of file on the server we can not go
 721         * to the server to get the new inode info.
 722         */
 723
 724        server->ops->set_fid(cfile, &cfile->fid, oplock);
 725        if (oparms.reconnect)
 726                cifs_relock_file(cfile);
 727
 728reopen_error_exit:
 729        kfree(full_path);
 730        free_xid(xid);
 731        return rc;
 732}
 733
 734int cifs_close(struct inode *inode, struct file *file)
 735{
 736        if (file->private_data != NULL) {
 737                cifsFileInfo_put(file->private_data);
 738                file->private_data = NULL;
 739        }
 740
 741        /* return code from the ->release op is always ignored */
 742        return 0;
 743}
 744
 745int cifs_closedir(struct inode *inode, struct file *file)
 746{
 747        int rc = 0;
 748        unsigned int xid;
 749        struct cifsFileInfo *cfile = file->private_data;
 750        struct cifs_tcon *tcon;
 751        struct TCP_Server_Info *server;
 752        char *buf;
 753
 754        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 755
 756        if (cfile == NULL)
 757                return rc;
 758
 759        xid = get_xid();
 760        tcon = tlink_tcon(cfile->tlink);
 761        server = tcon->ses->server;
 762
 763        cifs_dbg(FYI, "Freeing private data in close dir\n");
 764        spin_lock(&cifs_file_list_lock);
 765        if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
 766                cfile->invalidHandle = true;
 767                spin_unlock(&cifs_file_list_lock);
 768                if (server->ops->close_dir)
 769                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 770                else
 771                        rc = -ENOSYS;
 772                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 773                /* not much we can do if it fails anyway, ignore rc */
 774                rc = 0;
 775        } else
 776                spin_unlock(&cifs_file_list_lock);
 777
 778        buf = cfile->srch_inf.ntwrk_buf_start;
 779        if (buf) {
 780                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 781                cfile->srch_inf.ntwrk_buf_start = NULL;
 782                if (cfile->srch_inf.smallBuf)
 783                        cifs_small_buf_release(buf);
 784                else
 785                        cifs_buf_release(buf);
 786        }
 787
 788        cifs_put_tlink(cfile->tlink);
 789        kfree(file->private_data);
 790        file->private_data = NULL;
 791        /* BB can we lock the filestruct while this is going on? */
 792        free_xid(xid);
 793        return rc;
 794}
 795
 796static struct cifsLockInfo *
 797cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 798{
 799        struct cifsLockInfo *lock =
 800                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 801        if (!lock)
 802                return lock;
 803        lock->offset = offset;
 804        lock->length = length;
 805        lock->type = type;
 806        lock->pid = current->tgid;
 807        INIT_LIST_HEAD(&lock->blist);
 808        init_waitqueue_head(&lock->block_q);
 809        return lock;
 810}
 811
 812void
 813cifs_del_lock_waiters(struct cifsLockInfo *lock)
 814{
 815        struct cifsLockInfo *li, *tmp;
 816        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 817                list_del_init(&li->blist);
 818                wake_up(&li->block_q);
 819        }
 820}
 821
 822#define CIFS_LOCK_OP    0
 823#define CIFS_READ_OP    1
 824#define CIFS_WRITE_OP   2
 825
 826/* @rw_check : 0 - no op, 1 - read, 2 - write */
 827static bool
 828cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 829                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 830                            struct cifsLockInfo **conf_lock, int rw_check)
 831{
 832        struct cifsLockInfo *li;
 833        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 834        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 835
 836        list_for_each_entry(li, &fdlocks->locks, llist) {
 837                if (offset + length <= li->offset ||
 838                    offset >= li->offset + li->length)
 839                        continue;
 840                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 841                    server->ops->compare_fids(cfile, cur_cfile)) {
 842                        /* shared lock prevents write op through the same fid */
 843                        if (!(li->type & server->vals->shared_lock_type) ||
 844                            rw_check != CIFS_WRITE_OP)
 845                                continue;
 846                }
 847                if ((type & server->vals->shared_lock_type) &&
 848                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 849                     current->tgid == li->pid) || type == li->type))
 850                        continue;
 851                if (conf_lock)
 852                        *conf_lock = li;
 853                return true;
 854        }
 855        return false;
 856}
 857
 858bool
 859cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 860                        __u8 type, struct cifsLockInfo **conf_lock,
 861                        int rw_check)
 862{
 863        bool rc = false;
 864        struct cifs_fid_locks *cur;
 865        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 866
 867        list_for_each_entry(cur, &cinode->llist, llist) {
 868                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 869                                                 cfile, conf_lock, rw_check);
 870                if (rc)
 871                        break;
 872        }
 873
 874        return rc;
 875}
 876
 877/*
 878 * Check if there is another lock that prevents us to set the lock (mandatory
 879 * style). If such a lock exists, update the flock structure with its
 880 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 881 * or leave it the same if we can't. Returns 0 if we don't need to request to
 882 * the server or 1 otherwise.
 883 */
 884static int
 885cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 886               __u8 type, struct file_lock *flock)
 887{
 888        int rc = 0;
 889        struct cifsLockInfo *conf_lock;
 890        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 891        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 892        bool exist;
 893
 894        down_read(&cinode->lock_sem);
 895
 896        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 897                                        &conf_lock, CIFS_LOCK_OP);
 898        if (exist) {
 899                flock->fl_start = conf_lock->offset;
 900                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 901                flock->fl_pid = conf_lock->pid;
 902                if (conf_lock->type & server->vals->shared_lock_type)
 903                        flock->fl_type = F_RDLCK;
 904                else
 905                        flock->fl_type = F_WRLCK;
 906        } else if (!cinode->can_cache_brlcks)
 907                rc = 1;
 908        else
 909                flock->fl_type = F_UNLCK;
 910
 911        up_read(&cinode->lock_sem);
 912        return rc;
 913}
 914
 915static void
 916cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 917{
 918        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 919        down_write(&cinode->lock_sem);
 920        list_add_tail(&lock->llist, &cfile->llist->locks);
 921        up_write(&cinode->lock_sem);
 922}
 923
 924/*
 925 * Set the byte-range lock (mandatory style). Returns:
 926 * 1) 0, if we set the lock and don't need to request to the server;
 927 * 2) 1, if no locks prevent us but we need to request to the server;
 928 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 929 */
 930static int
 931cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 932                 bool wait)
 933{
 934        struct cifsLockInfo *conf_lock;
 935        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 936        bool exist;
 937        int rc = 0;
 938
 939try_again:
 940        exist = false;
 941        down_write(&cinode->lock_sem);
 942
 943        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 944                                        lock->type, &conf_lock, CIFS_LOCK_OP);
 945        if (!exist && cinode->can_cache_brlcks) {
 946                list_add_tail(&lock->llist, &cfile->llist->locks);
 947                up_write(&cinode->lock_sem);
 948                return rc;
 949        }
 950
 951        if (!exist)
 952                rc = 1;
 953        else if (!wait)
 954                rc = -EACCES;
 955        else {
 956                list_add_tail(&lock->blist, &conf_lock->blist);
 957                up_write(&cinode->lock_sem);
 958                rc = wait_event_interruptible(lock->block_q,
 959                                        (lock->blist.prev == &lock->blist) &&
 960                                        (lock->blist.next == &lock->blist));
 961                if (!rc)
 962                        goto try_again;
 963                down_write(&cinode->lock_sem);
 964                list_del_init(&lock->blist);
 965        }
 966
 967        up_write(&cinode->lock_sem);
 968        return rc;
 969}
 970
 971/*
 972 * Check if there is another lock that prevents us to set the lock (posix
 973 * style). If such a lock exists, update the flock structure with its
 974 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 975 * or leave it the same if we can't. Returns 0 if we don't need to request to
 976 * the server or 1 otherwise.
 977 */
 978static int
 979cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 980{
 981        int rc = 0;
 982        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 983        unsigned char saved_type = flock->fl_type;
 984
 985        if ((flock->fl_flags & FL_POSIX) == 0)
 986                return 1;
 987
 988        down_read(&cinode->lock_sem);
 989        posix_test_lock(file, flock);
 990
 991        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
 992                flock->fl_type = saved_type;
 993                rc = 1;
 994        }
 995
 996        up_read(&cinode->lock_sem);
 997        return rc;
 998}
 999
1000/*
1001 * Set the byte-range lock (posix style). Returns:
1002 * 1) 0, if we set the lock and don't need to request to the server;
1003 * 2) 1, if we need to request to the server;
1004 * 3) <0, if the error occurs while setting the lock.
1005 */
1006static int
1007cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1008{
1009        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1010        int rc = 1;
1011
1012        if ((flock->fl_flags & FL_POSIX) == 0)
1013                return rc;
1014
1015try_again:
1016        down_write(&cinode->lock_sem);
1017        if (!cinode->can_cache_brlcks) {
1018                up_write(&cinode->lock_sem);
1019                return rc;
1020        }
1021
1022        rc = posix_lock_file(file, flock, NULL);
1023        up_write(&cinode->lock_sem);
1024        if (rc == FILE_LOCK_DEFERRED) {
1025                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1026                if (!rc)
1027                        goto try_again;
1028                posix_unblock_lock(flock);
1029        }
1030        return rc;
1031}
1032
1033int
1034cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1035{
1036        unsigned int xid;
1037        int rc = 0, stored_rc;
1038        struct cifsLockInfo *li, *tmp;
1039        struct cifs_tcon *tcon;
1040        unsigned int num, max_num, max_buf;
1041        LOCKING_ANDX_RANGE *buf, *cur;
1042        int types[] = {LOCKING_ANDX_LARGE_FILES,
1043                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1044        int i;
1045
1046        xid = get_xid();
1047        tcon = tlink_tcon(cfile->tlink);
1048
1049        /*
1050         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1051         * and check it for zero before using.
1052         */
1053        max_buf = tcon->ses->server->maxBuf;
1054        if (!max_buf) {
1055                free_xid(xid);
1056                return -EINVAL;
1057        }
1058
1059        max_num = (max_buf - sizeof(struct smb_hdr)) /
1060                                                sizeof(LOCKING_ANDX_RANGE);
1061        buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1062        if (!buf) {
1063                free_xid(xid);
1064                return -ENOMEM;
1065        }
1066
1067        for (i = 0; i < 2; i++) {
1068                cur = buf;
1069                num = 0;
1070                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1071                        if (li->type != types[i])
1072                                continue;
1073                        cur->Pid = cpu_to_le16(li->pid);
1074                        cur->LengthLow = cpu_to_le32((u32)li->length);
1075                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1076                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1077                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1078                        if (++num == max_num) {
1079                                stored_rc = cifs_lockv(xid, tcon,
1080                                                       cfile->fid.netfid,
1081                                                       (__u8)li->type, 0, num,
1082                                                       buf);
1083                                if (stored_rc)
1084                                        rc = stored_rc;
1085                                cur = buf;
1086                                num = 0;
1087                        } else
1088                                cur++;
1089                }
1090
1091                if (num) {
1092                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1093                                               (__u8)types[i], 0, num, buf);
1094                        if (stored_rc)
1095                                rc = stored_rc;
1096                }
1097        }
1098
1099        kfree(buf);
1100        free_xid(xid);
1101        return rc;
1102}
1103
1104/* copied from fs/locks.c with a name change */
1105#define cifs_for_each_lock(inode, lockp) \
1106        for (lockp = &inode->i_flock; *lockp != NULL; \
1107             lockp = &(*lockp)->fl_next)
1108
1109struct lock_to_push {
1110        struct list_head llist;
1111        __u64 offset;
1112        __u64 length;
1113        __u32 pid;
1114        __u16 netfid;
1115        __u8 type;
1116};
1117
1118static int
1119cifs_push_posix_locks(struct cifsFileInfo *cfile)
1120{
1121        struct inode *inode = cfile->dentry->d_inode;
1122        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1123        struct file_lock *flock, **before;
1124        unsigned int count = 0, i = 0;
1125        int rc = 0, xid, type;
1126        struct list_head locks_to_send, *el;
1127        struct lock_to_push *lck, *tmp;
1128        __u64 length;
1129
1130        xid = get_xid();
1131
1132        spin_lock(&inode->i_lock);
1133        cifs_for_each_lock(inode, before) {
1134                if ((*before)->fl_flags & FL_POSIX)
1135                        count++;
1136        }
1137        spin_unlock(&inode->i_lock);
1138
1139        INIT_LIST_HEAD(&locks_to_send);
1140
1141        /*
1142         * Allocating count locks is enough because no FL_POSIX locks can be
1143         * added to the list while we are holding cinode->lock_sem that
1144         * protects locking operations of this inode.
1145         */
1146        for (; i < count; i++) {
1147                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1148                if (!lck) {
1149                        rc = -ENOMEM;
1150                        goto err_out;
1151                }
1152                list_add_tail(&lck->llist, &locks_to_send);
1153        }
1154
1155        el = locks_to_send.next;
1156        spin_lock(&inode->i_lock);
1157        cifs_for_each_lock(inode, before) {
1158                flock = *before;
1159                if ((flock->fl_flags & FL_POSIX) == 0)
1160                        continue;
1161                if (el == &locks_to_send) {
1162                        /*
1163                         * The list ended. We don't have enough allocated
1164                         * structures - something is really wrong.
1165                         */
1166                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1167                        break;
1168                }
1169                length = 1 + flock->fl_end - flock->fl_start;
1170                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1171                        type = CIFS_RDLCK;
1172                else
1173                        type = CIFS_WRLCK;
1174                lck = list_entry(el, struct lock_to_push, llist);
1175                lck->pid = flock->fl_pid;
1176                lck->netfid = cfile->fid.netfid;
1177                lck->length = length;
1178                lck->type = type;
1179                lck->offset = flock->fl_start;
1180                el = el->next;
1181        }
1182        spin_unlock(&inode->i_lock);
1183
1184        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1185                int stored_rc;
1186
1187                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1188                                             lck->offset, lck->length, NULL,
1189                                             lck->type, 0);
1190                if (stored_rc)
1191                        rc = stored_rc;
1192                list_del(&lck->llist);
1193                kfree(lck);
1194        }
1195
1196out:
1197        free_xid(xid);
1198        return rc;
1199err_out:
1200        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1201                list_del(&lck->llist);
1202                kfree(lck);
1203        }
1204        goto out;
1205}
1206
1207static int
1208cifs_push_locks(struct cifsFileInfo *cfile)
1209{
1210        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1211        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1212        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213        int rc = 0;
1214
1215        /* we are going to update can_cache_brlcks here - need a write access */
1216        down_write(&cinode->lock_sem);
1217        if (!cinode->can_cache_brlcks) {
1218                up_write(&cinode->lock_sem);
1219                return rc;
1220        }
1221
1222        if (cap_unix(tcon->ses) &&
1223            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1224            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1225                rc = cifs_push_posix_locks(cfile);
1226        else
1227                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1228
1229        cinode->can_cache_brlcks = false;
1230        up_write(&cinode->lock_sem);
1231        return rc;
1232}
1233
1234static void
1235cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1236                bool *wait_flag, struct TCP_Server_Info *server)
1237{
1238        if (flock->fl_flags & FL_POSIX)
1239                cifs_dbg(FYI, "Posix\n");
1240        if (flock->fl_flags & FL_FLOCK)
1241                cifs_dbg(FYI, "Flock\n");
1242        if (flock->fl_flags & FL_SLEEP) {
1243                cifs_dbg(FYI, "Blocking lock\n");
1244                *wait_flag = true;
1245        }
1246        if (flock->fl_flags & FL_ACCESS)
1247                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1248        if (flock->fl_flags & FL_LEASE)
1249                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1250        if (flock->fl_flags &
1251            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1252               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1253                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1254
1255        *type = server->vals->large_lock_type;
1256        if (flock->fl_type == F_WRLCK) {
1257                cifs_dbg(FYI, "F_WRLCK\n");
1258                *type |= server->vals->exclusive_lock_type;
1259                *lock = 1;
1260        } else if (flock->fl_type == F_UNLCK) {
1261                cifs_dbg(FYI, "F_UNLCK\n");
1262                *type |= server->vals->unlock_lock_type;
1263                *unlock = 1;
1264                /* Check if unlock includes more than one lock range */
1265        } else if (flock->fl_type == F_RDLCK) {
1266                cifs_dbg(FYI, "F_RDLCK\n");
1267                *type |= server->vals->shared_lock_type;
1268                *lock = 1;
1269        } else if (flock->fl_type == F_EXLCK) {
1270                cifs_dbg(FYI, "F_EXLCK\n");
1271                *type |= server->vals->exclusive_lock_type;
1272                *lock = 1;
1273        } else if (flock->fl_type == F_SHLCK) {
1274                cifs_dbg(FYI, "F_SHLCK\n");
1275                *type |= server->vals->shared_lock_type;
1276                *lock = 1;
1277        } else
1278                cifs_dbg(FYI, "Unknown type of lock\n");
1279}
1280
1281static int
1282cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1283           bool wait_flag, bool posix_lck, unsigned int xid)
1284{
1285        int rc = 0;
1286        __u64 length = 1 + flock->fl_end - flock->fl_start;
1287        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1288        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1289        struct TCP_Server_Info *server = tcon->ses->server;
1290        __u16 netfid = cfile->fid.netfid;
1291
1292        if (posix_lck) {
1293                int posix_lock_type;
1294
1295                rc = cifs_posix_lock_test(file, flock);
1296                if (!rc)
1297                        return rc;
1298
1299                if (type & server->vals->shared_lock_type)
1300                        posix_lock_type = CIFS_RDLCK;
1301                else
1302                        posix_lock_type = CIFS_WRLCK;
1303                rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1304                                      flock->fl_start, length, flock,
1305                                      posix_lock_type, wait_flag);
1306                return rc;
1307        }
1308
1309        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1310        if (!rc)
1311                return rc;
1312
1313        /* BB we could chain these into one lock request BB */
1314        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1315                                    1, 0, false);
1316        if (rc == 0) {
1317                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1318                                            type, 0, 1, false);
1319                flock->fl_type = F_UNLCK;
1320                if (rc != 0)
1321                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1322                                 rc);
1323                return 0;
1324        }
1325
1326        if (type & server->vals->shared_lock_type) {
1327                flock->fl_type = F_WRLCK;
1328                return 0;
1329        }
1330
1331        type &= ~server->vals->exclusive_lock_type;
1332
1333        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1334                                    type | server->vals->shared_lock_type,
1335                                    1, 0, false);
1336        if (rc == 0) {
1337                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1338                        type | server->vals->shared_lock_type, 0, 1, false);
1339                flock->fl_type = F_RDLCK;
1340                if (rc != 0)
1341                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1342                                 rc);
1343        } else
1344                flock->fl_type = F_WRLCK;
1345
1346        return 0;
1347}
1348
1349void
1350cifs_move_llist(struct list_head *source, struct list_head *dest)
1351{
1352        struct list_head *li, *tmp;
1353        list_for_each_safe(li, tmp, source)
1354                list_move(li, dest);
1355}
1356
1357void
1358cifs_free_llist(struct list_head *llist)
1359{
1360        struct cifsLockInfo *li, *tmp;
1361        list_for_each_entry_safe(li, tmp, llist, llist) {
1362                cifs_del_lock_waiters(li);
1363                list_del(&li->llist);
1364                kfree(li);
1365        }
1366}
1367
1368int
1369cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1370                  unsigned int xid)
1371{
1372        int rc = 0, stored_rc;
1373        int types[] = {LOCKING_ANDX_LARGE_FILES,
1374                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1375        unsigned int i;
1376        unsigned int max_num, num, max_buf;
1377        LOCKING_ANDX_RANGE *buf, *cur;
1378        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1379        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1380        struct cifsLockInfo *li, *tmp;
1381        __u64 length = 1 + flock->fl_end - flock->fl_start;
1382        struct list_head tmp_llist;
1383
1384        INIT_LIST_HEAD(&tmp_llist);
1385
1386        /*
1387         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1388         * and check it for zero before using.
1389         */
1390        max_buf = tcon->ses->server->maxBuf;
1391        if (!max_buf)
1392                return -EINVAL;
1393
1394        max_num = (max_buf - sizeof(struct smb_hdr)) /
1395                                                sizeof(LOCKING_ANDX_RANGE);
1396        buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1397        if (!buf)
1398                return -ENOMEM;
1399
1400        down_write(&cinode->lock_sem);
1401        for (i = 0; i < 2; i++) {
1402                cur = buf;
1403                num = 0;
1404                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1405                        if (flock->fl_start > li->offset ||
1406                            (flock->fl_start + length) <
1407                            (li->offset + li->length))
1408                                continue;
1409                        if (current->tgid != li->pid)
1410                                continue;
1411                        if (types[i] != li->type)
1412                                continue;
1413                        if (cinode->can_cache_brlcks) {
1414                                /*
1415                                 * We can cache brlock requests - simply remove
1416                                 * a lock from the file's list.
1417                                 */
1418                                list_del(&li->llist);
1419                                cifs_del_lock_waiters(li);
1420                                kfree(li);
1421                                continue;
1422                        }
1423                        cur->Pid = cpu_to_le16(li->pid);
1424                        cur->LengthLow = cpu_to_le32((u32)li->length);
1425                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1426                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1427                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1428                        /*
1429                         * We need to save a lock here to let us add it again to
1430                         * the file's list if the unlock range request fails on
1431                         * the server.
1432                         */
1433                        list_move(&li->llist, &tmp_llist);
1434                        if (++num == max_num) {
1435                                stored_rc = cifs_lockv(xid, tcon,
1436                                                       cfile->fid.netfid,
1437                                                       li->type, num, 0, buf);
1438                                if (stored_rc) {
1439                                        /*
1440                                         * We failed on the unlock range
1441                                         * request - add all locks from the tmp
1442                                         * list to the head of the file's list.
1443                                         */
1444                                        cifs_move_llist(&tmp_llist,
1445                                                        &cfile->llist->locks);
1446                                        rc = stored_rc;
1447                                } else
1448                                        /*
1449                                         * The unlock range request succeed -
1450                                         * free the tmp list.
1451                                         */
1452                                        cifs_free_llist(&tmp_llist);
1453                                cur = buf;
1454                                num = 0;
1455                        } else
1456                                cur++;
1457                }
1458                if (num) {
1459                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1460                                               types[i], num, 0, buf);
1461                        if (stored_rc) {
1462                                cifs_move_llist(&tmp_llist,
1463                                                &cfile->llist->locks);
1464                                rc = stored_rc;
1465                        } else
1466                                cifs_free_llist(&tmp_llist);
1467                }
1468        }
1469
1470        up_write(&cinode->lock_sem);
1471        kfree(buf);
1472        return rc;
1473}
1474
1475static int
1476cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1477           bool wait_flag, bool posix_lck, int lock, int unlock,
1478           unsigned int xid)
1479{
1480        int rc = 0;
1481        __u64 length = 1 + flock->fl_end - flock->fl_start;
1482        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1483        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1484        struct TCP_Server_Info *server = tcon->ses->server;
1485        struct inode *inode = cfile->dentry->d_inode;
1486
1487        if (posix_lck) {
1488                int posix_lock_type;
1489
1490                rc = cifs_posix_lock_set(file, flock);
1491                if (!rc || rc < 0)
1492                        return rc;
1493
1494                if (type & server->vals->shared_lock_type)
1495                        posix_lock_type = CIFS_RDLCK;
1496                else
1497                        posix_lock_type = CIFS_WRLCK;
1498
1499                if (unlock == 1)
1500                        posix_lock_type = CIFS_UNLCK;
1501
1502                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1503                                      current->tgid, flock->fl_start, length,
1504                                      NULL, posix_lock_type, wait_flag);
1505                goto out;
1506        }
1507
1508        if (lock) {
1509                struct cifsLockInfo *lock;
1510
1511                lock = cifs_lock_init(flock->fl_start, length, type);
1512                if (!lock)
1513                        return -ENOMEM;
1514
1515                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1516                if (rc < 0) {
1517                        kfree(lock);
1518                        return rc;
1519                }
1520                if (!rc)
1521                        goto out;
1522
1523                /*
1524                 * Windows 7 server can delay breaking lease from read to None
1525                 * if we set a byte-range lock on a file - break it explicitly
1526                 * before sending the lock to the server to be sure the next
1527                 * read won't conflict with non-overlapted locks due to
1528                 * pagereading.
1529                 */
1530                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1531                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1532                        cifs_invalidate_mapping(inode);
1533                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1534                                 inode);
1535                        CIFS_I(inode)->oplock = 0;
1536                }
1537
1538                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1539                                            type, 1, 0, wait_flag);
1540                if (rc) {
1541                        kfree(lock);
1542                        return rc;
1543                }
1544
1545                cifs_lock_add(cfile, lock);
1546        } else if (unlock)
1547                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1548
1549out:
1550        if (flock->fl_flags & FL_POSIX)
1551                posix_lock_file_wait(file, flock);
1552        return rc;
1553}
1554
1555int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1556{
1557        int rc, xid;
1558        int lock = 0, unlock = 0;
1559        bool wait_flag = false;
1560        bool posix_lck = false;
1561        struct cifs_sb_info *cifs_sb;
1562        struct cifs_tcon *tcon;
1563        struct cifsInodeInfo *cinode;
1564        struct cifsFileInfo *cfile;
1565        __u16 netfid;
1566        __u32 type;
1567
1568        rc = -EACCES;
1569        xid = get_xid();
1570
1571        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1572                 cmd, flock->fl_flags, flock->fl_type,
1573                 flock->fl_start, flock->fl_end);
1574
1575        cfile = (struct cifsFileInfo *)file->private_data;
1576        tcon = tlink_tcon(cfile->tlink);
1577
1578        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1579                        tcon->ses->server);
1580
1581        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1582        netfid = cfile->fid.netfid;
1583        cinode = CIFS_I(file_inode(file));
1584
1585        if (cap_unix(tcon->ses) &&
1586            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1587            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1588                posix_lck = true;
1589        /*
1590         * BB add code here to normalize offset and length to account for
1591         * negative length which we can not accept over the wire.
1592         */
1593        if (IS_GETLK(cmd)) {
1594                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1595                free_xid(xid);
1596                return rc;
1597        }
1598
1599        if (!lock && !unlock) {
1600                /*
1601                 * if no lock or unlock then nothing to do since we do not
1602                 * know what it is
1603                 */
1604                free_xid(xid);
1605                return -EOPNOTSUPP;
1606        }
1607
1608        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1609                        xid);
1610        free_xid(xid);
1611        return rc;
1612}
1613
1614/*
1615 * update the file size (if needed) after a write. Should be called with
1616 * the inode->i_lock held
1617 */
1618void
1619cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1620                      unsigned int bytes_written)
1621{
1622        loff_t end_of_write = offset + bytes_written;
1623
1624        if (end_of_write > cifsi->server_eof)
1625                cifsi->server_eof = end_of_write;
1626}
1627
1628static ssize_t
1629cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1630           size_t write_size, loff_t *offset)
1631{
1632        int rc = 0;
1633        unsigned int bytes_written = 0;
1634        unsigned int total_written;
1635        struct cifs_sb_info *cifs_sb;
1636        struct cifs_tcon *tcon;
1637        struct TCP_Server_Info *server;
1638        unsigned int xid;
1639        struct dentry *dentry = open_file->dentry;
1640        struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1641        struct cifs_io_parms io_parms;
1642
1643        cifs_sb = CIFS_SB(dentry->d_sb);
1644
1645        cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1646                 write_size, *offset, dentry->d_name.name);
1647
1648        tcon = tlink_tcon(open_file->tlink);
1649        server = tcon->ses->server;
1650
1651        if (!server->ops->sync_write)
1652                return -ENOSYS;
1653
1654        xid = get_xid();
1655
1656        for (total_written = 0; write_size > total_written;
1657             total_written += bytes_written) {
1658                rc = -EAGAIN;
1659                while (rc == -EAGAIN) {
1660                        struct kvec iov[2];
1661                        unsigned int len;
1662
1663                        if (open_file->invalidHandle) {
1664                                /* we could deadlock if we called
1665                                   filemap_fdatawait from here so tell
1666                                   reopen_file not to flush data to
1667                                   server now */
1668                                rc = cifs_reopen_file(open_file, false);
1669                                if (rc != 0)
1670                                        break;
1671                        }
1672
1673                        len = min((size_t)cifs_sb->wsize,
1674                                  write_size - total_written);
1675                        /* iov[0] is reserved for smb header */
1676                        iov[1].iov_base = (char *)write_data + total_written;
1677                        iov[1].iov_len = len;
1678                        io_parms.pid = pid;
1679                        io_parms.tcon = tcon;
1680                        io_parms.offset = *offset;
1681                        io_parms.length = len;
1682                        rc = server->ops->sync_write(xid, open_file, &io_parms,
1683                                                     &bytes_written, iov, 1);
1684                }
1685                if (rc || (bytes_written == 0)) {
1686                        if (total_written)
1687                                break;
1688                        else {
1689                                free_xid(xid);
1690                                return rc;
1691                        }
1692                } else {
1693                        spin_lock(&dentry->d_inode->i_lock);
1694                        cifs_update_eof(cifsi, *offset, bytes_written);
1695                        spin_unlock(&dentry->d_inode->i_lock);
1696                        *offset += bytes_written;
1697                }
1698        }
1699
1700        cifs_stats_bytes_written(tcon, total_written);
1701
1702        if (total_written > 0) {
1703                spin_lock(&dentry->d_inode->i_lock);
1704                if (*offset > dentry->d_inode->i_size)
1705                        i_size_write(dentry->d_inode, *offset);
1706                spin_unlock(&dentry->d_inode->i_lock);
1707        }
1708        mark_inode_dirty_sync(dentry->d_inode);
1709        free_xid(xid);
1710        return total_written;
1711}
1712
1713struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1714                                        bool fsuid_only)
1715{
1716        struct cifsFileInfo *open_file = NULL;
1717        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1718
1719        /* only filter by fsuid on multiuser mounts */
1720        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1721                fsuid_only = false;
1722
1723        spin_lock(&cifs_file_list_lock);
1724        /* we could simply get the first_list_entry since write-only entries
1725           are always at the end of the list but since the first entry might
1726           have a close pending, we go through the whole list */
1727        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1728                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1729                        continue;
1730                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1731                        if (!open_file->invalidHandle) {
1732                                /* found a good file */
1733                                /* lock it so it will not be closed on us */
1734                                cifsFileInfo_get_locked(open_file);
1735                                spin_unlock(&cifs_file_list_lock);
1736                                return open_file;
1737                        } /* else might as well continue, and look for
1738                             another, or simply have the caller reopen it
1739                             again rather than trying to fix this handle */
1740                } else /* write only file */
1741                        break; /* write only files are last so must be done */
1742        }
1743        spin_unlock(&cifs_file_list_lock);
1744        return NULL;
1745}
1746
1747struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1748                                        bool fsuid_only)
1749{
1750        struct cifsFileInfo *open_file, *inv_file = NULL;
1751        struct cifs_sb_info *cifs_sb;
1752        bool any_available = false;
1753        int rc;
1754        unsigned int refind = 0;
1755
1756        /* Having a null inode here (because mapping->host was set to zero by
1757        the VFS or MM) should not happen but we had reports of on oops (due to
1758        it being zero) during stress testcases so we need to check for it */
1759
1760        if (cifs_inode == NULL) {
1761                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1762                dump_stack();
1763                return NULL;
1764        }
1765
1766        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1767
1768        /* only filter by fsuid on multiuser mounts */
1769        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1770                fsuid_only = false;
1771
1772        spin_lock(&cifs_file_list_lock);
1773refind_writable:
1774        if (refind > MAX_REOPEN_ATT) {
1775                spin_unlock(&cifs_file_list_lock);
1776                return NULL;
1777        }
1778        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1779                if (!any_available && open_file->pid != current->tgid)
1780                        continue;
1781                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1782                        continue;
1783                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1784                        if (!open_file->invalidHandle) {
1785                                /* found a good writable file */
1786                                cifsFileInfo_get_locked(open_file);
1787                                spin_unlock(&cifs_file_list_lock);
1788                                return open_file;
1789                        } else {
1790                                if (!inv_file)
1791                                        inv_file = open_file;
1792                        }
1793                }
1794        }
1795        /* couldn't find useable FH with same pid, try any available */
1796        if (!any_available) {
1797                any_available = true;
1798                goto refind_writable;
1799        }
1800
1801        if (inv_file) {
1802                any_available = false;
1803                cifsFileInfo_get_locked(inv_file);
1804        }
1805
1806        spin_unlock(&cifs_file_list_lock);
1807
1808        if (inv_file) {
1809                rc = cifs_reopen_file(inv_file, false);
1810                if (!rc)
1811                        return inv_file;
1812                else {
1813                        spin_lock(&cifs_file_list_lock);
1814                        list_move_tail(&inv_file->flist,
1815                                        &cifs_inode->openFileList);
1816                        spin_unlock(&cifs_file_list_lock);
1817                        cifsFileInfo_put(inv_file);
1818                        spin_lock(&cifs_file_list_lock);
1819                        ++refind;
1820                        goto refind_writable;
1821                }
1822        }
1823
1824        return NULL;
1825}
1826
1827static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1828{
1829        struct address_space *mapping = page->mapping;
1830        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1831        char *write_data;
1832        int rc = -EFAULT;
1833        int bytes_written = 0;
1834        struct inode *inode;
1835        struct cifsFileInfo *open_file;
1836
1837        if (!mapping || !mapping->host)
1838                return -EFAULT;
1839
1840        inode = page->mapping->host;
1841
1842        offset += (loff_t)from;
1843        write_data = kmap(page);
1844        write_data += from;
1845
1846        if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1847                kunmap(page);
1848                return -EIO;
1849        }
1850
1851        /* racing with truncate? */
1852        if (offset > mapping->host->i_size) {
1853                kunmap(page);
1854                return 0; /* don't care */
1855        }
1856
1857        /* check to make sure that we are not extending the file */
1858        if (mapping->host->i_size - offset < (loff_t)to)
1859                to = (unsigned)(mapping->host->i_size - offset);
1860
1861        open_file = find_writable_file(CIFS_I(mapping->host), false);
1862        if (open_file) {
1863                bytes_written = cifs_write(open_file, open_file->pid,
1864                                           write_data, to - from, &offset);
1865                cifsFileInfo_put(open_file);
1866                /* Does mm or vfs already set times? */
1867                inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1868                if ((bytes_written > 0) && (offset))
1869                        rc = 0;
1870                else if (bytes_written < 0)
1871                        rc = bytes_written;
1872        } else {
1873                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1874                rc = -EIO;
1875        }
1876
1877        kunmap(page);
1878        return rc;
1879}
1880
1881static int cifs_writepages(struct address_space *mapping,
1882                           struct writeback_control *wbc)
1883{
1884        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1885        bool done = false, scanned = false, range_whole = false;
1886        pgoff_t end, index;
1887        struct cifs_writedata *wdata;
1888        struct TCP_Server_Info *server;
1889        struct page *page;
1890        int rc = 0;
1891
1892        /*
1893         * If wsize is smaller than the page cache size, default to writing
1894         * one page at a time via cifs_writepage
1895         */
1896        if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1897                return generic_writepages(mapping, wbc);
1898
1899        if (wbc->range_cyclic) {
1900                index = mapping->writeback_index; /* Start from prev offset */
1901                end = -1;
1902        } else {
1903                index = wbc->range_start >> PAGE_CACHE_SHIFT;
1904                end = wbc->range_end >> PAGE_CACHE_SHIFT;
1905                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1906                        range_whole = true;
1907                scanned = true;
1908        }
1909retry:
1910        while (!done && index <= end) {
1911                unsigned int i, nr_pages, found_pages;
1912                pgoff_t next = 0, tofind;
1913                struct page **pages;
1914
1915                tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1916                                end - index) + 1;
1917
1918                wdata = cifs_writedata_alloc((unsigned int)tofind,
1919                                             cifs_writev_complete);
1920                if (!wdata) {
1921                        rc = -ENOMEM;
1922                        break;
1923                }
1924
1925                /*
1926                 * find_get_pages_tag seems to return a max of 256 on each
1927                 * iteration, so we must call it several times in order to
1928                 * fill the array or the wsize is effectively limited to
1929                 * 256 * PAGE_CACHE_SIZE.
1930                 */
1931                found_pages = 0;
1932                pages = wdata->pages;
1933                do {
1934                        nr_pages = find_get_pages_tag(mapping, &index,
1935                                                        PAGECACHE_TAG_DIRTY,
1936                                                        tofind, pages);
1937                        found_pages += nr_pages;
1938                        tofind -= nr_pages;
1939                        pages += nr_pages;
1940                } while (nr_pages && tofind && index <= end);
1941
1942                if (found_pages == 0) {
1943                        kref_put(&wdata->refcount, cifs_writedata_release);
1944                        break;
1945                }
1946
1947                nr_pages = 0;
1948                for (i = 0; i < found_pages; i++) {
1949                        page = wdata->pages[i];
1950                        /*
1951                         * At this point we hold neither mapping->tree_lock nor
1952                         * lock on the page itself: the page may be truncated or
1953                         * invalidated (changing page->mapping to NULL), or even
1954                         * swizzled back from swapper_space to tmpfs file
1955                         * mapping
1956                         */
1957
1958                        if (nr_pages == 0)
1959                                lock_page(page);
1960                        else if (!trylock_page(page))
1961                                break;
1962
1963                        if (unlikely(page->mapping != mapping)) {
1964                                unlock_page(page);
1965                                break;
1966                        }
1967
1968                        if (!wbc->range_cyclic && page->index > end) {
1969                                done = true;
1970                                unlock_page(page);
1971                                break;
1972                        }
1973
1974                        if (next && (page->index != next)) {
1975                                /* Not next consecutive page */
1976                                unlock_page(page);
1977                                break;
1978                        }
1979
1980                        if (wbc->sync_mode != WB_SYNC_NONE)
1981                                wait_on_page_writeback(page);
1982
1983                        if (PageWriteback(page) ||
1984                                        !clear_page_dirty_for_io(page)) {
1985                                unlock_page(page);
1986                                break;
1987                        }
1988
1989                        /*
1990                         * This actually clears the dirty bit in the radix tree.
1991                         * See cifs_writepage() for more commentary.
1992                         */
1993                        set_page_writeback(page);
1994
1995                        if (page_offset(page) >= i_size_read(mapping->host)) {
1996                                done = true;
1997                                unlock_page(page);
1998                                end_page_writeback(page);
1999                                break;
2000                        }
2001
2002                        wdata->pages[i] = page;
2003                        next = page->index + 1;
2004                        ++nr_pages;
2005                }
2006
2007                /* reset index to refind any pages skipped */
2008                if (nr_pages == 0)
2009                        index = wdata->pages[0]->index + 1;
2010
2011                /* put any pages we aren't going to use */
2012                for (i = nr_pages; i < found_pages; i++) {
2013                        page_cache_release(wdata->pages[i]);
2014                        wdata->pages[i] = NULL;
2015                }
2016
2017                /* nothing to write? */
2018                if (nr_pages == 0) {
2019                        kref_put(&wdata->refcount, cifs_writedata_release);
2020                        continue;
2021                }
2022
2023                wdata->sync_mode = wbc->sync_mode;
2024                wdata->nr_pages = nr_pages;
2025                wdata->offset = page_offset(wdata->pages[0]);
2026                wdata->pagesz = PAGE_CACHE_SIZE;
2027                wdata->tailsz =
2028                        min(i_size_read(mapping->host) -
2029                            page_offset(wdata->pages[nr_pages - 1]),
2030                            (loff_t)PAGE_CACHE_SIZE);
2031                wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2032                                        wdata->tailsz;
2033
2034                do {
2035                        if (wdata->cfile != NULL)
2036                                cifsFileInfo_put(wdata->cfile);
2037                        wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2038                                                          false);
2039                        if (!wdata->cfile) {
2040                                cifs_dbg(VFS, "No writable handles for inode\n");
2041                                rc = -EBADF;
2042                                break;
2043                        }
2044                        wdata->pid = wdata->cfile->pid;
2045                        server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2046                        rc = server->ops->async_writev(wdata);
2047                } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2048
2049                for (i = 0; i < nr_pages; ++i)
2050                        unlock_page(wdata->pages[i]);
2051
2052                /* send failure -- clean up the mess */
2053                if (rc != 0) {
2054                        for (i = 0; i < nr_pages; ++i) {
2055                                if (rc == -EAGAIN)
2056                                        redirty_page_for_writepage(wbc,
2057                                                           wdata->pages[i]);
2058                                else
2059                                        SetPageError(wdata->pages[i]);
2060                                end_page_writeback(wdata->pages[i]);
2061                                page_cache_release(wdata->pages[i]);
2062                        }
2063                        if (rc != -EAGAIN)
2064                                mapping_set_error(mapping, rc);
2065                }
2066                kref_put(&wdata->refcount, cifs_writedata_release);
2067
2068                wbc->nr_to_write -= nr_pages;
2069                if (wbc->nr_to_write <= 0)
2070                        done = true;
2071
2072                index = next;
2073        }
2074
2075        if (!scanned && !done) {
2076                /*
2077                 * We hit the last page and there is more work to be done: wrap
2078                 * back to the start of the file
2079                 */
2080                scanned = true;
2081                index = 0;
2082                goto retry;
2083        }
2084
2085        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2086                mapping->writeback_index = index;
2087
2088        return rc;
2089}
2090
2091static int
2092cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2093{
2094        int rc;
2095        unsigned int xid;
2096
2097        xid = get_xid();
2098/* BB add check for wbc flags */
2099        page_cache_get(page);
2100        if (!PageUptodate(page))
2101                cifs_dbg(FYI, "ppw - page not up to date\n");
2102
2103        /*
2104         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2105         *
2106         * A writepage() implementation always needs to do either this,
2107         * or re-dirty the page with "redirty_page_for_writepage()" in
2108         * the case of a failure.
2109         *
2110         * Just unlocking the page will cause the radix tree tag-bits
2111         * to fail to update with the state of the page correctly.
2112         */
2113        set_page_writeback(page);
2114retry_write:
2115        rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2116        if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2117                goto retry_write;
2118        else if (rc == -EAGAIN)
2119                redirty_page_for_writepage(wbc, page);
2120        else if (rc != 0)
2121                SetPageError(page);
2122        else
2123                SetPageUptodate(page);
2124        end_page_writeback(page);
2125        page_cache_release(page);
2126        free_xid(xid);
2127        return rc;
2128}
2129
2130static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2131{
2132        int rc = cifs_writepage_locked(page, wbc);
2133        unlock_page(page);
2134        return rc;
2135}
2136
2137static int cifs_write_end(struct file *file, struct address_space *mapping,
2138                        loff_t pos, unsigned len, unsigned copied,
2139                        struct page *page, void *fsdata)
2140{
2141        int rc;
2142        struct inode *inode = mapping->host;
2143        struct cifsFileInfo *cfile = file->private_data;
2144        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2145        __u32 pid;
2146
2147        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2148                pid = cfile->pid;
2149        else
2150                pid = current->tgid;
2151
2152        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2153                 page, pos, copied);
2154
2155        if (PageChecked(page)) {
2156                if (copied == len)
2157                        SetPageUptodate(page);
2158                ClearPageChecked(page);
2159        } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2160                SetPageUptodate(page);
2161
2162        if (!PageUptodate(page)) {
2163                char *page_data;
2164                unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2165                unsigned int xid;
2166
2167                xid = get_xid();
2168                /* this is probably better than directly calling
2169                   partialpage_write since in this function the file handle is
2170                   known which we might as well leverage */
2171                /* BB check if anything else missing out of ppw
2172                   such as updating last write time */
2173                page_data = kmap(page);
2174                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2175                /* if (rc < 0) should we set writebehind rc? */
2176                kunmap(page);
2177
2178                free_xid(xid);
2179        } else {
2180                rc = copied;
2181                pos += copied;
2182                set_page_dirty(page);
2183        }
2184
2185        if (rc > 0) {
2186                spin_lock(&inode->i_lock);
2187                if (pos > inode->i_size)
2188                        i_size_write(inode, pos);
2189                spin_unlock(&inode->i_lock);
2190        }
2191
2192        unlock_page(page);
2193        page_cache_release(page);
2194
2195        return rc;
2196}
2197
2198int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2199                      int datasync)
2200{
2201        unsigned int xid;
2202        int rc = 0;
2203        struct cifs_tcon *tcon;
2204        struct TCP_Server_Info *server;
2205        struct cifsFileInfo *smbfile = file->private_data;
2206        struct inode *inode = file_inode(file);
2207        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2208
2209        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2210        if (rc)
2211                return rc;
2212        mutex_lock(&inode->i_mutex);
2213
2214        xid = get_xid();
2215
2216        cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2217                 file->f_path.dentry->d_name.name, datasync);
2218
2219        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2220                rc = cifs_invalidate_mapping(inode);
2221                if (rc) {
2222                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2223                        rc = 0; /* don't care about it in fsync */
2224                }
2225        }
2226
2227        tcon = tlink_tcon(smbfile->tlink);
2228        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2229                server = tcon->ses->server;
2230                if (server->ops->flush)
2231                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2232                else
2233                        rc = -ENOSYS;
2234        }
2235
2236        free_xid(xid);
2237        mutex_unlock(&inode->i_mutex);
2238        return rc;
2239}
2240
2241int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2242{
2243        unsigned int xid;
2244        int rc = 0;
2245        struct cifs_tcon *tcon;
2246        struct TCP_Server_Info *server;
2247        struct cifsFileInfo *smbfile = file->private_data;
2248        struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2249        struct inode *inode = file->f_mapping->host;
2250
2251        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2252        if (rc)
2253                return rc;
2254        mutex_lock(&inode->i_mutex);
2255
2256        xid = get_xid();
2257
2258        cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2259                 file->f_path.dentry->d_name.name, datasync);
2260
2261        tcon = tlink_tcon(smbfile->tlink);
2262        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2263                server = tcon->ses->server;
2264                if (server->ops->flush)
2265                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2266                else
2267                        rc = -ENOSYS;
2268        }
2269
2270        free_xid(xid);
2271        mutex_unlock(&inode->i_mutex);
2272        return rc;
2273}
2274
2275/*
2276 * As file closes, flush all cached write data for this inode checking
2277 * for write behind errors.
2278 */
2279int cifs_flush(struct file *file, fl_owner_t id)
2280{
2281        struct inode *inode = file_inode(file);
2282        int rc = 0;
2283
2284        if (file->f_mode & FMODE_WRITE)
2285                rc = filemap_write_and_wait(inode->i_mapping);
2286
2287        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2288
2289        return rc;
2290}
2291
2292static int
2293cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2294{
2295        int rc = 0;
2296        unsigned long i;
2297
2298        for (i = 0; i < num_pages; i++) {
2299                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2300                if (!pages[i]) {
2301                        /*
2302                         * save number of pages we have already allocated and
2303                         * return with ENOMEM error
2304                         */
2305                        num_pages = i;
2306                        rc = -ENOMEM;
2307                        break;
2308                }
2309        }
2310
2311        if (rc) {
2312                for (i = 0; i < num_pages; i++)
2313                        put_page(pages[i]);
2314        }
2315        return rc;
2316}
2317
2318static inline
2319size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2320{
2321        size_t num_pages;
2322        size_t clen;
2323
2324        clen = min_t(const size_t, len, wsize);
2325        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2326
2327        if (cur_len)
2328                *cur_len = clen;
2329
2330        return num_pages;
2331}
2332
2333static void
2334cifs_uncached_writev_complete(struct work_struct *work)
2335{
2336        int i;
2337        struct cifs_writedata *wdata = container_of(work,
2338                                        struct cifs_writedata, work);
2339        struct inode *inode = wdata->cfile->dentry->d_inode;
2340        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2341
2342        spin_lock(&inode->i_lock);
2343        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2344        if (cifsi->server_eof > inode->i_size)
2345                i_size_write(inode, cifsi->server_eof);
2346        spin_unlock(&inode->i_lock);
2347
2348        complete(&wdata->done);
2349
2350        if (wdata->result != -EAGAIN) {
2351                for (i = 0; i < wdata->nr_pages; i++)
2352                        put_page(wdata->pages[i]);
2353        }
2354
2355        kref_put(&wdata->refcount, cifs_writedata_release);
2356}
2357
2358/* attempt to send write to server, retry on any -EAGAIN errors */
2359static int
2360cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2361{
2362        int rc;
2363        struct TCP_Server_Info *server;
2364
2365        server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2366
2367        do {
2368                if (wdata->cfile->invalidHandle) {
2369                        rc = cifs_reopen_file(wdata->cfile, false);
2370                        if (rc != 0)
2371                                continue;
2372                }
2373                rc = server->ops->async_writev(wdata);
2374        } while (rc == -EAGAIN);
2375
2376        return rc;
2377}
2378
2379static ssize_t
2380cifs_iovec_write(struct file *file, const struct iovec *iov,
2381                 unsigned long nr_segs, loff_t *poffset)
2382{
2383        unsigned long nr_pages, i;
2384        size_t copied, len, cur_len;
2385        ssize_t total_written = 0;
2386        loff_t offset;
2387        struct iov_iter it;
2388        struct cifsFileInfo *open_file;
2389        struct cifs_tcon *tcon;
2390        struct cifs_sb_info *cifs_sb;
2391        struct cifs_writedata *wdata, *tmp;
2392        struct list_head wdata_list;
2393        int rc;
2394        pid_t pid;
2395
2396        len = iov_length(iov, nr_segs);
2397        if (!len)
2398                return 0;
2399
2400        rc = generic_write_checks(file, poffset, &len, 0);
2401        if (rc)
2402                return rc;
2403
2404        INIT_LIST_HEAD(&wdata_list);
2405        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2406        open_file = file->private_data;
2407        tcon = tlink_tcon(open_file->tlink);
2408
2409        if (!tcon->ses->server->ops->async_writev)
2410                return -ENOSYS;
2411
2412        offset = *poffset;
2413
2414        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2415                pid = open_file->pid;
2416        else
2417                pid = current->tgid;
2418
2419        iov_iter_init(&it, iov, nr_segs, len, 0);
2420        do {
2421                size_t save_len;
2422
2423                nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2424                wdata = cifs_writedata_alloc(nr_pages,
2425                                             cifs_uncached_writev_complete);
2426                if (!wdata) {
2427                        rc = -ENOMEM;
2428                        break;
2429                }
2430
2431                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2432                if (rc) {
2433                        kfree(wdata);
2434                        break;
2435                }
2436
2437                save_len = cur_len;
2438                for (i = 0; i < nr_pages; i++) {
2439                        copied = min_t(const size_t, cur_len, PAGE_SIZE);
2440                        copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2441                                                         0, copied);
2442                        cur_len -= copied;
2443                        iov_iter_advance(&it, copied);
2444                }
2445                cur_len = save_len - cur_len;
2446
2447                wdata->sync_mode = WB_SYNC_ALL;
2448                wdata->nr_pages = nr_pages;
2449                wdata->offset = (__u64)offset;
2450                wdata->cfile = cifsFileInfo_get(open_file);
2451                wdata->pid = pid;
2452                wdata->bytes = cur_len;
2453                wdata->pagesz = PAGE_SIZE;
2454                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2455                rc = cifs_uncached_retry_writev(wdata);
2456                if (rc) {
2457                        kref_put(&wdata->refcount, cifs_writedata_release);
2458                        break;
2459                }
2460
2461                list_add_tail(&wdata->list, &wdata_list);
2462                offset += cur_len;
2463                len -= cur_len;
2464        } while (len > 0);
2465
2466        /*
2467         * If at least one write was successfully sent, then discard any rc
2468         * value from the later writes. If the other write succeeds, then
2469         * we'll end up returning whatever was written. If it fails, then
2470         * we'll get a new rc value from that.
2471         */
2472        if (!list_empty(&wdata_list))
2473                rc = 0;
2474
2475        /*
2476         * Wait for and collect replies for any successful sends in order of
2477         * increasing offset. Once an error is hit or we get a fatal signal
2478         * while waiting, then return without waiting for any more replies.
2479         */
2480restart_loop:
2481        list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2482                if (!rc) {
2483                        /* FIXME: freezable too? */
2484                        rc = wait_for_completion_killable(&wdata->done);
2485                        if (rc)
2486                                rc = -EINTR;
2487                        else if (wdata->result)
2488                                rc = wdata->result;
2489                        else
2490                                total_written += wdata->bytes;
2491
2492                        /* resend call if it's a retryable error */
2493                        if (rc == -EAGAIN) {
2494                                rc = cifs_uncached_retry_writev(wdata);
2495                                goto restart_loop;
2496                        }
2497                }
2498                list_del_init(&wdata->list);
2499                kref_put(&wdata->refcount, cifs_writedata_release);
2500        }
2501
2502        if (total_written > 0)
2503                *poffset += total_written;
2504
2505        cifs_stats_bytes_written(tcon, total_written);
2506        return total_written ? total_written : (ssize_t)rc;
2507}
2508
2509ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2510                                unsigned long nr_segs, loff_t pos)
2511{
2512        ssize_t written;
2513        struct inode *inode;
2514
2515        inode = file_inode(iocb->ki_filp);
2516
2517        /*
2518         * BB - optimize the way when signing is disabled. We can drop this
2519         * extra memory-to-memory copying and use iovec buffers for constructing
2520         * write request.
2521         */
2522
2523        written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2524        if (written > 0) {
2525                CIFS_I(inode)->invalid_mapping = true;
2526                iocb->ki_pos = pos;
2527        }
2528
2529        return written;
2530}
2531
2532static ssize_t
2533cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2534            unsigned long nr_segs, loff_t pos)
2535{
2536        struct file *file = iocb->ki_filp;
2537        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2538        struct inode *inode = file->f_mapping->host;
2539        struct cifsInodeInfo *cinode = CIFS_I(inode);
2540        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2541        ssize_t rc = -EACCES;
2542
2543        BUG_ON(iocb->ki_pos != pos);
2544
2545        /*
2546         * We need to hold the sem to be sure nobody modifies lock list
2547         * with a brlock that prevents writing.
2548         */
2549        down_read(&cinode->lock_sem);
2550        if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2551                                     server->vals->exclusive_lock_type, NULL,
2552                                     CIFS_WRITE_OP)) {
2553                mutex_lock(&inode->i_mutex);
2554                rc = __generic_file_aio_write(iocb, iov, nr_segs,
2555                                               &iocb->ki_pos);
2556                mutex_unlock(&inode->i_mutex);
2557        }
2558
2559        if (rc > 0) {
2560                ssize_t err;
2561
2562                err = generic_write_sync(file, pos, rc);
2563                if (err < 0 && rc > 0)
2564                        rc = err;
2565        }
2566
2567        up_read(&cinode->lock_sem);
2568        return rc;
2569}
2570
2571ssize_t
2572cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2573                   unsigned long nr_segs, loff_t pos)
2574{
2575        struct inode *inode = file_inode(iocb->ki_filp);
2576        struct cifsInodeInfo *cinode = CIFS_I(inode);
2577        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2578        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2579                                                iocb->ki_filp->private_data;
2580        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2581        ssize_t written;
2582
2583        if (CIFS_CACHE_WRITE(cinode)) {
2584                if (cap_unix(tcon->ses) &&
2585                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2586                    && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2587                        return generic_file_aio_write(iocb, iov, nr_segs, pos);
2588                return cifs_writev(iocb, iov, nr_segs, pos);
2589        }
2590        /*
2591         * For non-oplocked files in strict cache mode we need to write the data
2592         * to the server exactly from the pos to pos+len-1 rather than flush all
2593         * affected pages because it may cause a error with mandatory locks on
2594         * these pages but not on the region from pos to ppos+len-1.
2595         */
2596        written = cifs_user_writev(iocb, iov, nr_segs, pos);
2597        if (written > 0 && CIFS_CACHE_READ(cinode)) {
2598                /*
2599                 * Windows 7 server can delay breaking level2 oplock if a write
2600                 * request comes - break it on the client to prevent reading
2601                 * an old data.
2602                 */
2603                cifs_invalidate_mapping(inode);
2604                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2605                         inode);
2606                cinode->oplock = 0;
2607        }
2608        return written;
2609}
2610
2611static struct cifs_readdata *
2612cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2613{
2614        struct cifs_readdata *rdata;
2615
2616        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2617                        GFP_KERNEL);
2618        if (rdata != NULL) {
2619                kref_init(&rdata->refcount);
2620                INIT_LIST_HEAD(&rdata->list);
2621                init_completion(&rdata->done);
2622                INIT_WORK(&rdata->work, complete);
2623        }
2624
2625        return rdata;
2626}
2627
2628void
2629cifs_readdata_release(struct kref *refcount)
2630{
2631        struct cifs_readdata *rdata = container_of(refcount,
2632                                        struct cifs_readdata, refcount);
2633
2634        if (rdata->cfile)
2635                cifsFileInfo_put(rdata->cfile);
2636
2637        kfree(rdata);
2638}
2639
2640static int
2641cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2642{
2643        int rc = 0;
2644        struct page *page;
2645        unsigned int i;
2646
2647        for (i = 0; i < nr_pages; i++) {
2648                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2649                if (!page) {
2650                        rc = -ENOMEM;
2651                        break;
2652                }
2653                rdata->pages[i] = page;
2654        }
2655
2656        if (rc) {
2657                for (i = 0; i < nr_pages; i++) {
2658                        put_page(rdata->pages[i]);
2659                        rdata->pages[i] = NULL;
2660                }
2661        }
2662        return rc;
2663}
2664
2665static void
2666cifs_uncached_readdata_release(struct kref *refcount)
2667{
2668        struct cifs_readdata *rdata = container_of(refcount,
2669                                        struct cifs_readdata, refcount);
2670        unsigned int i;
2671
2672        for (i = 0; i < rdata->nr_pages; i++) {
2673                put_page(rdata->pages[i]);
2674                rdata->pages[i] = NULL;
2675        }
2676        cifs_readdata_release(refcount);
2677}
2678
2679static int
2680cifs_retry_async_readv(struct cifs_readdata *rdata)
2681{
2682        int rc;
2683        struct TCP_Server_Info *server;
2684
2685        server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2686
2687        do {
2688                if (rdata->cfile->invalidHandle) {
2689                        rc = cifs_reopen_file(rdata->cfile, true);
2690                        if (rc != 0)
2691                                continue;
2692                }
2693                rc = server->ops->async_readv(rdata);
2694        } while (rc == -EAGAIN);
2695
2696        return rc;
2697}
2698
2699/**
2700 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2701 * @rdata:      the readdata response with list of pages holding data
2702 * @iov:        vector in which we should copy the data
2703 * @nr_segs:    number of segments in vector
2704 * @offset:     offset into file of the first iovec
2705 * @copied:     used to return the amount of data copied to the iov
2706 *
2707 * This function copies data from a list of pages in a readdata response into
2708 * an array of iovecs. It will first calculate where the data should go
2709 * based on the info in the readdata and then copy the data into that spot.
2710 */
2711static ssize_t
2712cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2713                        unsigned long nr_segs, loff_t offset, ssize_t *copied)
2714{
2715        int rc = 0;
2716        struct iov_iter ii;
2717        size_t pos = rdata->offset - offset;
2718        ssize_t remaining = rdata->bytes;
2719        unsigned char *pdata;
2720        unsigned int i;
2721
2722        /* set up iov_iter and advance to the correct offset */
2723        iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2724        iov_iter_advance(&ii, pos);
2725
2726        *copied = 0;
2727        for (i = 0; i < rdata->nr_pages; i++) {
2728                ssize_t copy;
2729                struct page *page = rdata->pages[i];
2730
2731                /* copy a whole page or whatever's left */
2732                copy = min_t(ssize_t, remaining, PAGE_SIZE);
2733
2734                /* ...but limit it to whatever space is left in the iov */
2735                copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2736
2737                /* go while there's data to be copied and no errors */
2738                if (copy && !rc) {
2739                        pdata = kmap(page);
2740                        rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2741                                                (int)copy);
2742                        kunmap(page);
2743                        if (!rc) {
2744                                *copied += copy;
2745                                remaining -= copy;
2746                                iov_iter_advance(&ii, copy);
2747                        }
2748                }
2749        }
2750
2751        return rc;
2752}
2753
2754static void
2755cifs_uncached_readv_complete(struct work_struct *work)
2756{
2757        struct cifs_readdata *rdata = container_of(work,
2758                                                struct cifs_readdata, work);
2759
2760        complete(&rdata->done);
2761        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2762}
2763
2764static int
2765cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2766                        struct cifs_readdata *rdata, unsigned int len)
2767{
2768        int total_read = 0, result = 0;
2769        unsigned int i;
2770        unsigned int nr_pages = rdata->nr_pages;
2771        struct kvec iov;
2772
2773        rdata->tailsz = PAGE_SIZE;
2774        for (i = 0; i < nr_pages; i++) {
2775                struct page *page = rdata->pages[i];
2776
2777                if (len >= PAGE_SIZE) {
2778                        /* enough data to fill the page */
2779                        iov.iov_base = kmap(page);
2780                        iov.iov_len = PAGE_SIZE;
2781                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2782                                 i, iov.iov_base, iov.iov_len);
2783                        len -= PAGE_SIZE;
2784                } else if (len > 0) {
2785                        /* enough for partial page, fill and zero the rest */
2786                        iov.iov_base = kmap(page);
2787                        iov.iov_len = len;
2788                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2789                                 i, iov.iov_base, iov.iov_len);
2790                        memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2791                        rdata->tailsz = len;
2792                        len = 0;
2793                } else {
2794                        /* no need to hold page hostage */
2795                        rdata->pages[i] = NULL;
2796                        rdata->nr_pages--;
2797                        put_page(page);
2798                        continue;
2799                }
2800
2801                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2802                kunmap(page);
2803                if (result < 0)
2804                        break;
2805
2806                total_read += result;
2807        }
2808
2809        return total_read > 0 ? total_read : result;
2810}
2811
2812static ssize_t
2813cifs_iovec_read(struct file *file, const struct iovec *iov,
2814                 unsigned long nr_segs, loff_t *poffset)
2815{
2816        ssize_t rc;
2817        size_t len, cur_len;
2818        ssize_t total_read = 0;
2819        loff_t offset = *poffset;
2820        unsigned int npages;
2821        struct cifs_sb_info *cifs_sb;
2822        struct cifs_tcon *tcon;
2823        struct cifsFileInfo *open_file;
2824        struct cifs_readdata *rdata, *tmp;
2825        struct list_head rdata_list;
2826        pid_t pid;
2827
2828        if (!nr_segs)
2829                return 0;
2830
2831        len = iov_length(iov, nr_segs);
2832        if (!len)
2833                return 0;
2834
2835        INIT_LIST_HEAD(&rdata_list);
2836        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2837        open_file = file->private_data;
2838        tcon = tlink_tcon(open_file->tlink);
2839
2840        if (!tcon->ses->server->ops->async_readv)
2841                return -ENOSYS;
2842
2843        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2844                pid = open_file->pid;
2845        else
2846                pid = current->tgid;
2847
2848        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2849                cifs_dbg(FYI, "attempting read on write only file instance\n");
2850
2851        do {
2852                cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2853                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2854
2855                /* allocate a readdata struct */
2856                rdata = cifs_readdata_alloc(npages,
2857                                            cifs_uncached_readv_complete);
2858                if (!rdata) {
2859                        rc = -ENOMEM;
2860                        goto error;
2861                }
2862
2863                rc = cifs_read_allocate_pages(rdata, npages);
2864                if (rc)
2865                        goto error;
2866
2867                rdata->cfile = cifsFileInfo_get(open_file);
2868                rdata->nr_pages = npages;
2869                rdata->offset = offset;
2870                rdata->bytes = cur_len;
2871                rdata->pid = pid;
2872                rdata->pagesz = PAGE_SIZE;
2873                rdata->read_into_pages = cifs_uncached_read_into_pages;
2874
2875                rc = cifs_retry_async_readv(rdata);
2876error:
2877                if (rc) {
2878                        kref_put(&rdata->refcount,
2879                                 cifs_uncached_readdata_release);
2880                        break;
2881                }
2882
2883                list_add_tail(&rdata->list, &rdata_list);
2884                offset += cur_len;
2885                len -= cur_len;
2886        } while (len > 0);
2887
2888        /* if at least one read request send succeeded, then reset rc */
2889        if (!list_empty(&rdata_list))
2890                rc = 0;
2891
2892        /* the loop below should proceed in the order of increasing offsets */
2893restart_loop:
2894        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2895                if (!rc) {
2896                        ssize_t copied;
2897
2898                        /* FIXME: freezable sleep too? */
2899                        rc = wait_for_completion_killable(&rdata->done);
2900                        if (rc)
2901                                rc = -EINTR;
2902                        else if (rdata->result)
2903                                rc = rdata->result;
2904                        else {
2905                                rc = cifs_readdata_to_iov(rdata, iov,
2906                                                        nr_segs, *poffset,
2907                                                        &copied);
2908                                total_read += copied;
2909                        }
2910
2911                        /* resend call if it's a retryable error */
2912                        if (rc == -EAGAIN) {
2913                                rc = cifs_retry_async_readv(rdata);
2914                                goto restart_loop;
2915                        }
2916                }
2917                list_del_init(&rdata->list);
2918                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2919        }
2920
2921        cifs_stats_bytes_read(tcon, total_read);
2922        *poffset += total_read;
2923
2924        /* mask nodata case */
2925        if (rc == -ENODATA)
2926                rc = 0;
2927
2928        return total_read ? total_read : rc;
2929}
2930
2931ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2932                               unsigned long nr_segs, loff_t pos)
2933{
2934        ssize_t read;
2935
2936        read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2937        if (read > 0)
2938                iocb->ki_pos = pos;
2939
2940        return read;
2941}
2942
2943ssize_t
2944cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2945                  unsigned long nr_segs, loff_t pos)
2946{
2947        struct inode *inode = file_inode(iocb->ki_filp);
2948        struct cifsInodeInfo *cinode = CIFS_I(inode);
2949        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2950        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2951                                                iocb->ki_filp->private_data;
2952        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2953        int rc = -EACCES;
2954
2955        /*
2956         * In strict cache mode we need to read from the server all the time
2957         * if we don't have level II oplock because the server can delay mtime
2958         * change - so we can't make a decision about inode invalidating.
2959         * And we can also fail with pagereading if there are mandatory locks
2960         * on pages affected by this read but not on the region from pos to
2961         * pos+len-1.
2962         */
2963        if (!CIFS_CACHE_READ(cinode))
2964                return cifs_user_readv(iocb, iov, nr_segs, pos);
2965
2966        if (cap_unix(tcon->ses) &&
2967            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2968            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2969                return generic_file_aio_read(iocb, iov, nr_segs, pos);
2970
2971        /*
2972         * We need to hold the sem to be sure nobody modifies lock list
2973         * with a brlock that prevents reading.
2974         */
2975        down_read(&cinode->lock_sem);
2976        if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2977                                     tcon->ses->server->vals->shared_lock_type,
2978                                     NULL, CIFS_READ_OP))
2979                rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2980        up_read(&cinode->lock_sem);
2981        return rc;
2982}
2983
2984static ssize_t
2985cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2986{
2987        int rc = -EACCES;
2988        unsigned int bytes_read = 0;
2989        unsigned int total_read;
2990        unsigned int current_read_size;
2991        unsigned int rsize;
2992        struct cifs_sb_info *cifs_sb;
2993        struct cifs_tcon *tcon;
2994        struct TCP_Server_Info *server;
2995        unsigned int xid;
2996        char *cur_offset;
2997        struct cifsFileInfo *open_file;
2998        struct cifs_io_parms io_parms;
2999        int buf_type = CIFS_NO_BUFFER;
3000        __u32 pid;
3001
3002        xid = get_xid();
3003        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3004
3005        /* FIXME: set up handlers for larger reads and/or convert to async */
3006        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3007
3008        if (file->private_data == NULL) {
3009                rc = -EBADF;
3010                free_xid(xid);
3011                return rc;
3012        }
3013        open_file = file->private_data;
3014        tcon = tlink_tcon(open_file->tlink);
3015        server = tcon->ses->server;
3016
3017        if (!server->ops->sync_read) {
3018                free_xid(xid);
3019                return -ENOSYS;
3020        }
3021
3022        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3023                pid = open_file->pid;
3024        else
3025                pid = current->tgid;
3026
3027        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3028                cifs_dbg(FYI, "attempting read on write only file instance\n");
3029
3030        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3031             total_read += bytes_read, cur_offset += bytes_read) {
3032                current_read_size = min_t(uint, read_size - total_read, rsize);
3033                /*
3034                 * For windows me and 9x we do not want to request more than it
3035                 * negotiated since it will refuse the read then.
3036                 */
3037                if ((tcon->ses) && !(tcon->ses->capabilities &
3038                                tcon->ses->server->vals->cap_large_files)) {
3039                        current_read_size = min_t(uint, current_read_size,
3040                                        CIFSMaxBufSize);
3041                }
3042                rc = -EAGAIN;
3043                while (rc == -EAGAIN) {
3044                        if (open_file->invalidHandle) {
3045                                rc = cifs_reopen_file(open_file, true);
3046                                if (rc != 0)
3047                                        break;
3048                        }
3049                        io_parms.pid = pid;
3050                        io_parms.tcon = tcon;
3051                        io_parms.offset = *offset;
3052                        io_parms.length = current_read_size;
3053                        rc = server->ops->sync_read(xid, open_file, &io_parms,
3054                                                    &bytes_read, &cur_offset,
3055                                                    &buf_type);
3056                }
3057                if (rc || (bytes_read == 0)) {
3058                        if (total_read) {
3059                                break;
3060                        } else {
3061                                free_xid(xid);
3062                                return rc;
3063                        }
3064                } else {
3065                        cifs_stats_bytes_read(tcon, total_read);
3066                        *offset += bytes_read;
3067                }
3068        }
3069        free_xid(xid);
3070        return total_read;
3071}
3072
3073/*
3074 * If the page is mmap'ed into a process' page tables, then we need to make
3075 * sure that it doesn't change while being written back.
3076 */
3077static int
3078cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3079{
3080        struct page *page = vmf->page;
3081
3082        lock_page(page);
3083        return VM_FAULT_LOCKED;
3084}
3085
3086static struct vm_operations_struct cifs_file_vm_ops = {
3087        .fault = filemap_fault,
3088        .page_mkwrite = cifs_page_mkwrite,
3089        .remap_pages = generic_file_remap_pages,
3090};
3091
3092int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3093{
3094        int rc, xid;
3095        struct inode *inode = file_inode(file);
3096
3097        xid = get_xid();
3098
3099        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3100                rc = cifs_invalidate_mapping(inode);
3101                if (rc)
3102                        return rc;
3103        }
3104
3105        rc = generic_file_mmap(file, vma);
3106        if (rc == 0)
3107                vma->vm_ops = &cifs_file_vm_ops;
3108        free_xid(xid);
3109        return rc;
3110}
3111
3112int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3113{
3114        int rc, xid;
3115
3116        xid = get_xid();
3117        rc = cifs_revalidate_file(file);
3118        if (rc) {
3119                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3120                         rc);
3121                free_xid(xid);
3122                return rc;
3123        }
3124        rc = generic_file_mmap(file, vma);
3125        if (rc == 0)
3126                vma->vm_ops = &cifs_file_vm_ops;
3127        free_xid(xid);
3128        return rc;
3129}
3130
3131static void
3132cifs_readv_complete(struct work_struct *work)
3133{
3134        unsigned int i;
3135        struct cifs_readdata *rdata = container_of(work,
3136                                                struct cifs_readdata, work);
3137
3138        for (i = 0; i < rdata->nr_pages; i++) {
3139                struct page *page = rdata->pages[i];
3140
3141                lru_cache_add_file(page);
3142
3143                if (rdata->result == 0) {
3144                        flush_dcache_page(page);
3145                        SetPageUptodate(page);
3146                }
3147
3148                unlock_page(page);
3149
3150                if (rdata->result == 0)
3151                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3152
3153                page_cache_release(page);
3154                rdata->pages[i] = NULL;
3155        }
3156        kref_put(&rdata->refcount, cifs_readdata_release);
3157}
3158
3159static int
3160cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3161                        struct cifs_readdata *rdata, unsigned int len)
3162{
3163        int total_read = 0, result = 0;
3164        unsigned int i;
3165        u64 eof;
3166        pgoff_t eof_index;
3167        unsigned int nr_pages = rdata->nr_pages;
3168        struct kvec iov;
3169
3170        /* determine the eof that the server (probably) has */
3171        eof = CIFS_I(rdata->mapping->host)->server_eof;
3172        eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3173        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3174
3175        rdata->tailsz = PAGE_CACHE_SIZE;
3176        for (i = 0; i < nr_pages; i++) {
3177                struct page *page = rdata->pages[i];
3178
3179                if (len >= PAGE_CACHE_SIZE) {
3180                        /* enough data to fill the page */
3181                        iov.iov_base = kmap(page);
3182                        iov.iov_len = PAGE_CACHE_SIZE;
3183                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3184                                 i, page->index, iov.iov_base, iov.iov_len);
3185                        len -= PAGE_CACHE_SIZE;
3186                } else if (len > 0) {
3187                        /* enough for partial page, fill and zero the rest */
3188                        iov.iov_base = kmap(page);
3189                        iov.iov_len = len;
3190                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3191                                 i, page->index, iov.iov_base, iov.iov_len);
3192                        memset(iov.iov_base + len,
3193                                '\0', PAGE_CACHE_SIZE - len);
3194                        rdata->tailsz = len;
3195                        len = 0;
3196                } else if (page->index > eof_index) {
3197                        /*
3198                         * The VFS will not try to do readahead past the
3199                         * i_size, but it's possible that we have outstanding
3200                         * writes with gaps in the middle and the i_size hasn't
3201                         * caught up yet. Populate those with zeroed out pages
3202                         * to prevent the VFS from repeatedly attempting to
3203                         * fill them until the writes are flushed.
3204                         */
3205                        zero_user(page, 0, PAGE_CACHE_SIZE);
3206                        lru_cache_add_file(page);
3207                        flush_dcache_page(page);
3208                        SetPageUptodate(page);
3209                        unlock_page(page);
3210                        page_cache_release(page);
3211                        rdata->pages[i] = NULL;
3212                        rdata->nr_pages--;
3213                        continue;
3214                } else {
3215                        /* no need to hold page hostage */
3216                        lru_cache_add_file(page);
3217                        unlock_page(page);
3218                        page_cache_release(page);
3219                        rdata->pages[i] = NULL;
3220                        rdata->nr_pages--;
3221                        continue;
3222                }
3223
3224                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3225                kunmap(page);
3226                if (result < 0)
3227                        break;
3228
3229                total_read += result;
3230        }
3231
3232        return total_read > 0 ? total_read : result;
3233}
3234
3235static int cifs_readpages(struct file *file, struct address_space *mapping,
3236        struct list_head *page_list, unsigned num_pages)
3237{
3238        int rc;
3239        struct list_head tmplist;
3240        struct cifsFileInfo *open_file = file->private_data;
3241        struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3242        unsigned int rsize = cifs_sb->rsize;
3243        pid_t pid;
3244
3245        /*
3246         * Give up immediately if rsize is too small to read an entire page.
3247         * The VFS will fall back to readpage. We should never reach this
3248         * point however since we set ra_pages to 0 when the rsize is smaller
3249         * than a cache page.
3250         */
3251        if (unlikely(rsize < PAGE_CACHE_SIZE))
3252                return 0;
3253
3254        /*
3255         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3256         * immediately if the cookie is negative
3257         *
3258         * After this point, every page in the list might have PG_fscache set,
3259         * so we will need to clean that up off of every page we don't use.
3260         */
3261        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3262                                         &num_pages);
3263        if (rc == 0)
3264                return rc;
3265
3266        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3267                pid = open_file->pid;
3268        else
3269                pid = current->tgid;
3270
3271        rc = 0;
3272        INIT_LIST_HEAD(&tmplist);
3273
3274        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3275                 __func__, file, mapping, num_pages);
3276
3277        /*
3278         * Start with the page at end of list and move it to private
3279         * list. Do the same with any following pages until we hit
3280         * the rsize limit, hit an index discontinuity, or run out of
3281         * pages. Issue the async read and then start the loop again
3282         * until the list is empty.
3283         *
3284         * Note that list order is important. The page_list is in
3285         * the order of declining indexes. When we put the pages in
3286         * the rdata->pages, then we want them in increasing order.
3287         */
3288        while (!list_empty(page_list)) {
3289                unsigned int i;
3290                unsigned int bytes = PAGE_CACHE_SIZE;
3291                unsigned int expected_index;
3292                unsigned int nr_pages = 1;
3293                loff_t offset;
3294                struct page *page, *tpage;
3295                struct cifs_readdata *rdata;
3296
3297                page = list_entry(page_list->prev, struct page, lru);
3298
3299                /*
3300                 * Lock the page and put it in the cache. Since no one else
3301                 * should have access to this page, we're safe to simply set
3302                 * PG_locked without checking it first.
3303                 */
3304                __set_page_locked(page);
3305                rc = add_to_page_cache_locked(page, mapping,
3306                                              page->index, GFP_KERNEL);
3307
3308                /* give up if we can't stick it in the cache */
3309                if (rc) {
3310                        __clear_page_locked(page);
3311                        break;
3312                }
3313
3314                /* move first page to the tmplist */
3315                offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3316                list_move_tail(&page->lru, &tmplist);
3317
3318                /* now try and add more pages onto the request */
3319                expected_index = page->index + 1;
3320                list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3321                        /* discontinuity ? */
3322                        if (page->index != expected_index)
3323                                break;
3324
3325                        /* would this page push the read over the rsize? */
3326                        if (bytes + PAGE_CACHE_SIZE > rsize)
3327                                break;
3328
3329                        __set_page_locked(page);
3330                        if (add_to_page_cache_locked(page, mapping,
3331                                                page->index, GFP_KERNEL)) {
3332                                __clear_page_locked(page);
3333                                break;
3334                        }
3335                        list_move_tail(&page->lru, &tmplist);
3336                        bytes += PAGE_CACHE_SIZE;
3337                        expected_index++;
3338                        nr_pages++;
3339                }
3340
3341                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3342                if (!rdata) {
3343                        /* best to give up if we're out of mem */
3344                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3345                                list_del(&page->lru);
3346                                lru_cache_add_file(page);
3347                                unlock_page(page);
3348                                page_cache_release(page);
3349                        }
3350                        rc = -ENOMEM;
3351                        break;
3352                }
3353
3354                rdata->cfile = cifsFileInfo_get(open_file);
3355                rdata->mapping = mapping;
3356                rdata->offset = offset;
3357                rdata->bytes = bytes;
3358                rdata->pid = pid;
3359                rdata->pagesz = PAGE_CACHE_SIZE;
3360                rdata->read_into_pages = cifs_readpages_read_into_pages;
3361
3362                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3363                        list_del(&page->lru);
3364                        rdata->pages[rdata->nr_pages++] = page;
3365                }
3366
3367                rc = cifs_retry_async_readv(rdata);
3368                if (rc != 0) {
3369                        for (i = 0; i < rdata->nr_pages; i++) {
3370                                page = rdata->pages[i];
3371                                lru_cache_add_file(page);
3372                                unlock_page(page);
3373                                page_cache_release(page);
3374                        }
3375                        kref_put(&rdata->refcount, cifs_readdata_release);
3376                        break;
3377                }
3378
3379                kref_put(&rdata->refcount, cifs_readdata_release);
3380        }
3381
3382        /* Any pages that have been shown to fscache but didn't get added to
3383         * the pagecache must be uncached before they get returned to the
3384         * allocator.
3385         */
3386        cifs_fscache_readpages_cancel(mapping->host, page_list);
3387        return rc;
3388}
3389
3390/*
3391 * cifs_readpage_worker must be called with the page pinned
3392 */
3393static int cifs_readpage_worker(struct file *file, struct page *page,
3394        loff_t *poffset)
3395{
3396        char *read_data;
3397        int rc;
3398
3399        /* Is the page cached? */
3400        rc = cifs_readpage_from_fscache(file_inode(file), page);
3401        if (rc == 0)
3402                goto read_complete;
3403
3404        read_data = kmap(page);
3405        /* for reads over a certain size could initiate async read ahead */
3406
3407        rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3408
3409        if (rc < 0)
3410                goto io_error;
3411        else
3412                cifs_dbg(FYI, "Bytes read %d\n", rc);
3413
3414        file_inode(file)->i_atime =
3415                current_fs_time(file_inode(file)->i_sb);
3416
3417        if (PAGE_CACHE_SIZE > rc)
3418                memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3419
3420        flush_dcache_page(page);
3421        SetPageUptodate(page);
3422
3423        /* send this page to the cache */
3424        cifs_readpage_to_fscache(file_inode(file), page);
3425
3426        rc = 0;
3427
3428io_error:
3429        kunmap(page);
3430        unlock_page(page);
3431
3432read_complete:
3433        return rc;
3434}
3435
3436static int cifs_readpage(struct file *file, struct page *page)
3437{
3438        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3439        int rc = -EACCES;
3440        unsigned int xid;
3441
3442        xid = get_xid();
3443
3444        if (file->private_data == NULL) {
3445                rc = -EBADF;
3446                free_xid(xid);
3447                return rc;
3448        }
3449
3450        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3451                 page, (int)offset, (int)offset);
3452
3453        rc = cifs_readpage_worker(file, page, &offset);
3454
3455        free_xid(xid);
3456        return rc;
3457}
3458
3459static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3460{
3461        struct cifsFileInfo *open_file;
3462
3463        spin_lock(&cifs_file_list_lock);
3464        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3465                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3466                        spin_unlock(&cifs_file_list_lock);
3467                        return 1;
3468                }
3469        }
3470        spin_unlock(&cifs_file_list_lock);
3471        return 0;
3472}
3473
3474/* We do not want to update the file size from server for inodes
3475   open for write - to avoid races with writepage extending
3476   the file - in the future we could consider allowing
3477   refreshing the inode only on increases in the file size
3478   but this is tricky to do without racing with writebehind
3479   page caching in the current Linux kernel design */
3480bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3481{
3482        if (!cifsInode)
3483                return true;
3484
3485        if (is_inode_writable(cifsInode)) {
3486                /* This inode is open for write at least once */
3487                struct cifs_sb_info *cifs_sb;
3488
3489                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3490                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3491                        /* since no page cache to corrupt on directio
3492                        we can change size safely */
3493                        return true;
3494                }
3495
3496                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3497                        return true;
3498
3499                return false;
3500        } else
3501                return true;
3502}
3503
3504static int cifs_write_begin(struct file *file, struct address_space *mapping,
3505                        loff_t pos, unsigned len, unsigned flags,
3506                        struct page **pagep, void **fsdata)
3507{
3508        int oncethru = 0;
3509        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3510        loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3511        loff_t page_start = pos & PAGE_MASK;
3512        loff_t i_size;
3513        struct page *page;
3514        int rc = 0;
3515
3516        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3517
3518start:
3519        page = grab_cache_page_write_begin(mapping, index, flags);
3520        if (!page) {
3521                rc = -ENOMEM;
3522                goto out;
3523        }
3524
3525        if (PageUptodate(page))
3526                goto out;
3527
3528        /*
3529         * If we write a full page it will be up to date, no need to read from
3530         * the server. If the write is short, we'll end up doing a sync write
3531         * instead.
3532         */
3533        if (len == PAGE_CACHE_SIZE)
3534                goto out;
3535
3536        /*
3537         * optimize away the read when we have an oplock, and we're not
3538         * expecting to use any of the data we'd be reading in. That
3539         * is, when the page lies beyond the EOF, or straddles the EOF
3540         * and the write will cover all of the existing data.
3541         */
3542        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3543                i_size = i_size_read(mapping->host);
3544                if (page_start >= i_size ||
3545                    (offset == 0 && (pos + len) >= i_size)) {
3546                        zero_user_segments(page, 0, offset,
3547                                           offset + len,
3548                                           PAGE_CACHE_SIZE);
3549                        /*
3550                         * PageChecked means that the parts of the page
3551                         * to which we're not writing are considered up
3552                         * to date. Once the data is copied to the
3553                         * page, it can be set uptodate.
3554                         */
3555                        SetPageChecked(page);
3556                        goto out;
3557                }
3558        }
3559
3560        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3561                /*
3562                 * might as well read a page, it is fast enough. If we get
3563                 * an error, we don't need to return it. cifs_write_end will
3564                 * do a sync write instead since PG_uptodate isn't set.
3565                 */
3566                cifs_readpage_worker(file, page, &page_start);
3567                page_cache_release(page);
3568                oncethru = 1;
3569                goto start;
3570        } else {
3571                /* we could try using another file handle if there is one -
3572                   but how would we lock it to prevent close of that handle
3573                   racing with this read? In any case
3574                   this will be written out by write_end so is fine */
3575        }
3576out:
3577        *pagep = page;
3578        return rc;
3579}
3580
3581static int cifs_release_page(struct page *page, gfp_t gfp)
3582{
3583        if (PagePrivate(page))
3584                return 0;
3585
3586        return cifs_fscache_release_page(page, gfp);
3587}
3588
3589static void cifs_invalidate_page(struct page *page, unsigned int offset,
3590                                 unsigned int length)
3591{
3592        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3593
3594        if (offset == 0 && length == PAGE_CACHE_SIZE)
3595                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3596}
3597
3598static int cifs_launder_page(struct page *page)
3599{
3600        int rc = 0;
3601        loff_t range_start = page_offset(page);
3602        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3603        struct writeback_control wbc = {
3604                .sync_mode = WB_SYNC_ALL,
3605                .nr_to_write = 0,
3606                .range_start = range_start,
3607                .range_end = range_end,
3608        };
3609
3610        cifs_dbg(FYI, "Launder page: %p\n", page);
3611
3612        if (clear_page_dirty_for_io(page))
3613                rc = cifs_writepage_locked(page, &wbc);
3614
3615        cifs_fscache_invalidate_page(page, page->mapping->host);
3616        return rc;
3617}
3618
3619void cifs_oplock_break(struct work_struct *work)
3620{
3621        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3622                                                  oplock_break);
3623        struct inode *inode = cfile->dentry->d_inode;
3624        struct cifsInodeInfo *cinode = CIFS_I(inode);
3625        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3626        int rc = 0;
3627
3628        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3629                                                cifs_has_mand_locks(cinode)) {
3630                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3631                         inode);
3632                cinode->oplock = 0;
3633        }
3634
3635        if (inode && S_ISREG(inode->i_mode)) {
3636                if (CIFS_CACHE_READ(cinode))
3637                        break_lease(inode, O_RDONLY);
3638                else
3639                        break_lease(inode, O_WRONLY);
3640                rc = filemap_fdatawrite(inode->i_mapping);
3641                if (!CIFS_CACHE_READ(cinode)) {
3642                        rc = filemap_fdatawait(inode->i_mapping);
3643                        mapping_set_error(inode->i_mapping, rc);
3644                        cifs_invalidate_mapping(inode);
3645                }
3646                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3647        }
3648
3649        rc = cifs_push_locks(cfile);
3650        if (rc)
3651                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3652
3653        /*
3654         * releasing stale oplock after recent reconnect of smb session using
3655         * a now incorrect file handle is not a data integrity issue but do
3656         * not bother sending an oplock release if session to server still is
3657         * disconnected since oplock already released by the server
3658         */
3659        if (!cfile->oplock_break_cancelled) {
3660                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3661                                                             cinode);
3662                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3663        }
3664}
3665
3666const struct address_space_operations cifs_addr_ops = {
3667        .readpage = cifs_readpage,
3668        .readpages = cifs_readpages,
3669        .writepage = cifs_writepage,
3670        .writepages = cifs_writepages,
3671        .write_begin = cifs_write_begin,
3672        .write_end = cifs_write_end,
3673        .set_page_dirty = __set_page_dirty_nobuffers,
3674        .releasepage = cifs_release_page,
3675        .invalidatepage = cifs_invalidate_page,
3676        .launder_page = cifs_launder_page,
3677};
3678
3679/*
3680 * cifs_readpages requires the server to support a buffer large enough to
3681 * contain the header plus one complete page of data.  Otherwise, we need
3682 * to leave cifs_readpages out of the address space operations.
3683 */
3684const struct address_space_operations cifs_addr_ops_smallbuf = {
3685        .readpage = cifs_readpage,
3686        .writepage = cifs_writepage,
3687        .writepages = cifs_writepages,
3688        .write_begin = cifs_write_begin,
3689        .write_end = cifs_write_end,
3690        .set_page_dirty = __set_page_dirty_nobuffers,
3691        .releasepage = cifs_release_page,
3692        .invalidatepage = cifs_invalidate_page,
3693        .launder_page = cifs_launder_page,
3694};
3695