linux/fs/ntfs/attrib.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/**
   3 * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project.
   4 *
   5 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
   6 * Copyright (c) 2002 Richard Russon
   7 */
   8
   9#include <linux/buffer_head.h>
  10#include <linux/sched.h>
  11#include <linux/slab.h>
  12#include <linux/swap.h>
  13#include <linux/writeback.h>
  14
  15#include "attrib.h"
  16#include "debug.h"
  17#include "layout.h"
  18#include "lcnalloc.h"
  19#include "malloc.h"
  20#include "mft.h"
  21#include "ntfs.h"
  22#include "types.h"
  23
  24/**
  25 * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
  26 * @ni:         ntfs inode for which to map (part of) a runlist
  27 * @vcn:        map runlist part containing this vcn
  28 * @ctx:        active attribute search context if present or NULL if not
  29 *
  30 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
  31 *
  32 * If @ctx is specified, it is an active search context of @ni and its base mft
  33 * record.  This is needed when ntfs_map_runlist_nolock() encounters unmapped
  34 * runlist fragments and allows their mapping.  If you do not have the mft
  35 * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock()
  36 * will perform the necessary mapping and unmapping.
  37 *
  38 * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and
  39 * restores it before returning.  Thus, @ctx will be left pointing to the same
  40 * attribute on return as on entry.  However, the actual pointers in @ctx may
  41 * point to different memory locations on return, so you must remember to reset
  42 * any cached pointers from the @ctx, i.e. after the call to
  43 * ntfs_map_runlist_nolock(), you will probably want to do:
  44 *      m = ctx->mrec;
  45 *      a = ctx->attr;
  46 * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
  47 * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
  48 *
  49 * Return 0 on success and -errno on error.  There is one special error code
  50 * which is not an error as such.  This is -ENOENT.  It means that @vcn is out
  51 * of bounds of the runlist.
  52 *
  53 * Note the runlist can be NULL after this function returns if @vcn is zero and
  54 * the attribute has zero allocated size, i.e. there simply is no runlist.
  55 *
  56 * WARNING: If @ctx is supplied, regardless of whether success or failure is
  57 *          returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
  58 *          is no longer valid, i.e. you need to either call
  59 *          ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
  60 *          In that case PTR_ERR(@ctx->mrec) will give you the error code for
  61 *          why the mapping of the old inode failed.
  62 *
  63 * Locking: - The runlist described by @ni must be locked for writing on entry
  64 *            and is locked on return.  Note the runlist will be modified.
  65 *          - If @ctx is NULL, the base mft record of @ni must not be mapped on
  66 *            entry and it will be left unmapped on return.
  67 *          - If @ctx is not NULL, the base mft record must be mapped on entry
  68 *            and it will be left mapped on return.
  69 */
  70int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
  71{
  72        VCN end_vcn;
  73        unsigned long flags;
  74        ntfs_inode *base_ni;
  75        MFT_RECORD *m;
  76        ATTR_RECORD *a;
  77        runlist_element *rl;
  78        struct page *put_this_page = NULL;
  79        int err = 0;
  80        bool ctx_is_temporary, ctx_needs_reset;
  81        ntfs_attr_search_ctx old_ctx = { NULL, };
  82
  83        ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
  84                        (unsigned long long)vcn);
  85        if (!NInoAttr(ni))
  86                base_ni = ni;
  87        else
  88                base_ni = ni->ext.base_ntfs_ino;
  89        if (!ctx) {
  90                ctx_is_temporary = ctx_needs_reset = true;
  91                m = map_mft_record(base_ni);
  92                if (IS_ERR(m))
  93                        return PTR_ERR(m);
  94                ctx = ntfs_attr_get_search_ctx(base_ni, m);
  95                if (unlikely(!ctx)) {
  96                        err = -ENOMEM;
  97                        goto err_out;
  98                }
  99        } else {
 100                VCN allocated_size_vcn;
 101
 102                BUG_ON(IS_ERR(ctx->mrec));
 103                a = ctx->attr;
 104                BUG_ON(!a->non_resident);
 105                ctx_is_temporary = false;
 106                end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
 107                read_lock_irqsave(&ni->size_lock, flags);
 108                allocated_size_vcn = ni->allocated_size >>
 109                                ni->vol->cluster_size_bits;
 110                read_unlock_irqrestore(&ni->size_lock, flags);
 111                if (!a->data.non_resident.lowest_vcn && end_vcn <= 0)
 112                        end_vcn = allocated_size_vcn - 1;
 113                /*
 114                 * If we already have the attribute extent containing @vcn in
 115                 * @ctx, no need to look it up again.  We slightly cheat in
 116                 * that if vcn exceeds the allocated size, we will refuse to
 117                 * map the runlist below, so there is definitely no need to get
 118                 * the right attribute extent.
 119                 */
 120                if (vcn >= allocated_size_vcn || (a->type == ni->type &&
 121                                a->name_length == ni->name_len &&
 122                                !memcmp((u8*)a + le16_to_cpu(a->name_offset),
 123                                ni->name, ni->name_len) &&
 124                                sle64_to_cpu(a->data.non_resident.lowest_vcn)
 125                                <= vcn && end_vcn >= vcn))
 126                        ctx_needs_reset = false;
 127                else {
 128                        /* Save the old search context. */
 129                        old_ctx = *ctx;
 130                        /*
 131                         * If the currently mapped (extent) inode is not the
 132                         * base inode we will unmap it when we reinitialize the
 133                         * search context which means we need to get a
 134                         * reference to the page containing the mapped mft
 135                         * record so we do not accidentally drop changes to the
 136                         * mft record when it has not been marked dirty yet.
 137                         */
 138                        if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
 139                                        old_ctx.base_ntfs_ino) {
 140                                put_this_page = old_ctx.ntfs_ino->page;
 141                                get_page(put_this_page);
 142                        }
 143                        /*
 144                         * Reinitialize the search context so we can lookup the
 145                         * needed attribute extent.
 146                         */
 147                        ntfs_attr_reinit_search_ctx(ctx);
 148                        ctx_needs_reset = true;
 149                }
 150        }
 151        if (ctx_needs_reset) {
 152                err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
 153                                CASE_SENSITIVE, vcn, NULL, 0, ctx);
 154                if (unlikely(err)) {
 155                        if (err == -ENOENT)
 156                                err = -EIO;
 157                        goto err_out;
 158                }
 159                BUG_ON(!ctx->attr->non_resident);
 160        }
 161        a = ctx->attr;
 162        /*
 163         * Only decompress the mapping pairs if @vcn is inside it.  Otherwise
 164         * we get into problems when we try to map an out of bounds vcn because
 165         * we then try to map the already mapped runlist fragment and
 166         * ntfs_mapping_pairs_decompress() fails.
 167         */
 168        end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
 169        if (unlikely(vcn && vcn >= end_vcn)) {
 170                err = -ENOENT;
 171                goto err_out;
 172        }
 173        rl = ntfs_mapping_pairs_decompress(ni->vol, a, ni->runlist.rl);
 174        if (IS_ERR(rl))
 175                err = PTR_ERR(rl);
 176        else
 177                ni->runlist.rl = rl;
 178err_out:
 179        if (ctx_is_temporary) {
 180                if (likely(ctx))
 181                        ntfs_attr_put_search_ctx(ctx);
 182                unmap_mft_record(base_ni);
 183        } else if (ctx_needs_reset) {
 184                /*
 185                 * If there is no attribute list, restoring the search context
 186                 * is accomplished simply by copying the saved context back over
 187                 * the caller supplied context.  If there is an attribute list,
 188                 * things are more complicated as we need to deal with mapping
 189                 * of mft records and resulting potential changes in pointers.
 190                 */
 191                if (NInoAttrList(base_ni)) {
 192                        /*
 193                         * If the currently mapped (extent) inode is not the
 194                         * one we had before, we need to unmap it and map the
 195                         * old one.
 196                         */
 197                        if (ctx->ntfs_ino != old_ctx.ntfs_ino) {
 198                                /*
 199                                 * If the currently mapped inode is not the
 200                                 * base inode, unmap it.
 201                                 */
 202                                if (ctx->base_ntfs_ino && ctx->ntfs_ino !=
 203                                                ctx->base_ntfs_ino) {
 204                                        unmap_extent_mft_record(ctx->ntfs_ino);
 205                                        ctx->mrec = ctx->base_mrec;
 206                                        BUG_ON(!ctx->mrec);
 207                                }
 208                                /*
 209                                 * If the old mapped inode is not the base
 210                                 * inode, map it.
 211                                 */
 212                                if (old_ctx.base_ntfs_ino &&
 213                                                old_ctx.ntfs_ino !=
 214                                                old_ctx.base_ntfs_ino) {
 215retry_map:
 216                                        ctx->mrec = map_mft_record(
 217                                                        old_ctx.ntfs_ino);
 218                                        /*
 219                                         * Something bad has happened.  If out
 220                                         * of memory retry till it succeeds.
 221                                         * Any other errors are fatal and we
 222                                         * return the error code in ctx->mrec.
 223                                         * Let the caller deal with it...  We
 224                                         * just need to fudge things so the
 225                                         * caller can reinit and/or put the
 226                                         * search context safely.
 227                                         */
 228                                        if (IS_ERR(ctx->mrec)) {
 229                                                if (PTR_ERR(ctx->mrec) ==
 230                                                                -ENOMEM) {
 231                                                        schedule();
 232                                                        goto retry_map;
 233                                                } else
 234                                                        old_ctx.ntfs_ino =
 235                                                                old_ctx.
 236                                                                base_ntfs_ino;
 237                                        }
 238                                }
 239                        }
 240                        /* Update the changed pointers in the saved context. */
 241                        if (ctx->mrec != old_ctx.mrec) {
 242                                if (!IS_ERR(ctx->mrec))
 243                                        old_ctx.attr = (ATTR_RECORD*)(
 244                                                        (u8*)ctx->mrec +
 245                                                        ((u8*)old_ctx.attr -
 246                                                        (u8*)old_ctx.mrec));
 247                                old_ctx.mrec = ctx->mrec;
 248                        }
 249                }
 250                /* Restore the search context to the saved one. */
 251                *ctx = old_ctx;
 252                /*
 253                 * We drop the reference on the page we took earlier.  In the
 254                 * case that IS_ERR(ctx->mrec) is true this means we might lose
 255                 * some changes to the mft record that had been made between
 256                 * the last time it was marked dirty/written out and now.  This
 257                 * at this stage is not a problem as the mapping error is fatal
 258                 * enough that the mft record cannot be written out anyway and
 259                 * the caller is very likely to shutdown the whole inode
 260                 * immediately and mark the volume dirty for chkdsk to pick up
 261                 * the pieces anyway.
 262                 */
 263                if (put_this_page)
 264                        put_page(put_this_page);
 265        }
 266        return err;
 267}
 268
 269/**
 270 * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
 271 * @ni:         ntfs inode for which to map (part of) a runlist
 272 * @vcn:        map runlist part containing this vcn
 273 *
 274 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
 275 *
 276 * Return 0 on success and -errno on error.  There is one special error code
 277 * which is not an error as such.  This is -ENOENT.  It means that @vcn is out
 278 * of bounds of the runlist.
 279 *
 280 * Locking: - The runlist must be unlocked on entry and is unlocked on return.
 281 *          - This function takes the runlist lock for writing and may modify
 282 *            the runlist.
 283 */
 284int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 285{
 286        int err = 0;
 287
 288        down_write(&ni->runlist.lock);
 289        /* Make sure someone else didn't do the work while we were sleeping. */
 290        if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
 291                        LCN_RL_NOT_MAPPED))
 292                err = ntfs_map_runlist_nolock(ni, vcn, NULL);
 293        up_write(&ni->runlist.lock);
 294        return err;
 295}
 296
 297/**
 298 * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode
 299 * @ni:                 ntfs inode of the attribute whose runlist to search
 300 * @vcn:                vcn to convert
 301 * @write_locked:       true if the runlist is locked for writing
 302 *
 303 * Find the virtual cluster number @vcn in the runlist of the ntfs attribute
 304 * described by the ntfs inode @ni and return the corresponding logical cluster
 305 * number (lcn).
 306 *
 307 * If the @vcn is not mapped yet, the attempt is made to map the attribute
 308 * extent containing the @vcn and the vcn to lcn conversion is retried.
 309 *
 310 * If @write_locked is true the caller has locked the runlist for writing and
 311 * if false for reading.
 312 *
 313 * Since lcns must be >= 0, we use negative return codes with special meaning:
 314 *
 315 * Return code  Meaning / Description
 316 * ==========================================
 317 *  LCN_HOLE    Hole / not allocated on disk.
 318 *  LCN_ENOENT  There is no such vcn in the runlist, i.e. @vcn is out of bounds.
 319 *  LCN_ENOMEM  Not enough memory to map runlist.
 320 *  LCN_EIO     Critical error (runlist/file is corrupt, i/o error, etc).
 321 *
 322 * Locking: - The runlist must be locked on entry and is left locked on return.
 323 *          - If @write_locked is 'false', i.e. the runlist is locked for reading,
 324 *            the lock may be dropped inside the function so you cannot rely on
 325 *            the runlist still being the same when this function returns.
 326 */
 327LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
 328                const bool write_locked)
 329{
 330        LCN lcn;
 331        unsigned long flags;
 332        bool is_retry = false;
 333
 334        BUG_ON(!ni);
 335        ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
 336                        ni->mft_no, (unsigned long long)vcn,
 337                        write_locked ? "write" : "read");
 338        BUG_ON(!NInoNonResident(ni));
 339        BUG_ON(vcn < 0);
 340        if (!ni->runlist.rl) {
 341                read_lock_irqsave(&ni->size_lock, flags);
 342                if (!ni->allocated_size) {
 343                        read_unlock_irqrestore(&ni->size_lock, flags);
 344                        return LCN_ENOENT;
 345                }
 346                read_unlock_irqrestore(&ni->size_lock, flags);
 347        }
 348retry_remap:
 349        /* Convert vcn to lcn.  If that fails map the runlist and retry once. */
 350        lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
 351        if (likely(lcn >= LCN_HOLE)) {
 352                ntfs_debug("Done, lcn 0x%llx.", (long long)lcn);
 353                return lcn;
 354        }
 355        if (lcn != LCN_RL_NOT_MAPPED) {
 356                if (lcn != LCN_ENOENT)
 357                        lcn = LCN_EIO;
 358        } else if (!is_retry) {
 359                int err;
 360
 361                if (!write_locked) {
 362                        up_read(&ni->runlist.lock);
 363                        down_write(&ni->runlist.lock);
 364                        if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
 365                                        LCN_RL_NOT_MAPPED)) {
 366                                up_write(&ni->runlist.lock);
 367                                down_read(&ni->runlist.lock);
 368                                goto retry_remap;
 369                        }
 370                }
 371                err = ntfs_map_runlist_nolock(ni, vcn, NULL);
 372                if (!write_locked) {
 373                        up_write(&ni->runlist.lock);
 374                        down_read(&ni->runlist.lock);
 375                }
 376                if (likely(!err)) {
 377                        is_retry = true;
 378                        goto retry_remap;
 379                }
 380                if (err == -ENOENT)
 381                        lcn = LCN_ENOENT;
 382                else if (err == -ENOMEM)
 383                        lcn = LCN_ENOMEM;
 384                else
 385                        lcn = LCN_EIO;
 386        }
 387        if (lcn != LCN_ENOENT)
 388                ntfs_error(ni->vol->sb, "Failed with error code %lli.",
 389                                (long long)lcn);
 390        return lcn;
 391}
 392
 393/**
 394 * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
 395 * @ni:         ntfs inode describing the runlist to search
 396 * @vcn:        vcn to find
 397 * @ctx:        active attribute search context if present or NULL if not
 398 *
 399 * Find the virtual cluster number @vcn in the runlist described by the ntfs
 400 * inode @ni and return the address of the runlist element containing the @vcn.
 401 *
 402 * If the @vcn is not mapped yet, the attempt is made to map the attribute
 403 * extent containing the @vcn and the vcn to lcn conversion is retried.
 404 *
 405 * If @ctx is specified, it is an active search context of @ni and its base mft
 406 * record.  This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped
 407 * runlist fragments and allows their mapping.  If you do not have the mft
 408 * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock()
 409 * will perform the necessary mapping and unmapping.
 410 *
 411 * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and
 412 * restores it before returning.  Thus, @ctx will be left pointing to the same
 413 * attribute on return as on entry.  However, the actual pointers in @ctx may
 414 * point to different memory locations on return, so you must remember to reset
 415 * any cached pointers from the @ctx, i.e. after the call to
 416 * ntfs_attr_find_vcn_nolock(), you will probably want to do:
 417 *      m = ctx->mrec;
 418 *      a = ctx->attr;
 419 * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
 420 * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
 421 * Note you need to distinguish between the lcn of the returned runlist element
 422 * being >= 0 and LCN_HOLE.  In the later case you have to return zeroes on
 423 * read and allocate clusters on write.
 424 *
 425 * Return the runlist element containing the @vcn on success and
 426 * ERR_PTR(-errno) on error.  You need to test the return value with IS_ERR()
 427 * to decide if the return is success or failure and PTR_ERR() to get to the
 428 * error code if IS_ERR() is true.
 429 *
 430 * The possible error return codes are:
 431 *      -ENOENT - No such vcn in the runlist, i.e. @vcn is out of bounds.
 432 *      -ENOMEM - Not enough memory to map runlist.
 433 *      -EIO    - Critical error (runlist/file is corrupt, i/o error, etc).
 434 *
 435 * WARNING: If @ctx is supplied, regardless of whether success or failure is
 436 *          returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
 437 *          is no longer valid, i.e. you need to either call
 438 *          ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
 439 *          In that case PTR_ERR(@ctx->mrec) will give you the error code for
 440 *          why the mapping of the old inode failed.
 441 *
 442 * Locking: - The runlist described by @ni must be locked for writing on entry
 443 *            and is locked on return.  Note the runlist may be modified when
 444 *            needed runlist fragments need to be mapped.
 445 *          - If @ctx is NULL, the base mft record of @ni must not be mapped on
 446 *            entry and it will be left unmapped on return.
 447 *          - If @ctx is not NULL, the base mft record must be mapped on entry
 448 *            and it will be left mapped on return.
 449 */
 450runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
 451                ntfs_attr_search_ctx *ctx)
 452{
 453        unsigned long flags;
 454        runlist_element *rl;
 455        int err = 0;
 456        bool is_retry = false;
 457
 458        BUG_ON(!ni);
 459        ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
 460                        ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
 461        BUG_ON(!NInoNonResident(ni));
 462        BUG_ON(vcn < 0);
 463        if (!ni->runlist.rl) {
 464                read_lock_irqsave(&ni->size_lock, flags);
 465                if (!ni->allocated_size) {
 466                        read_unlock_irqrestore(&ni->size_lock, flags);
 467                        return ERR_PTR(-ENOENT);
 468                }
 469                read_unlock_irqrestore(&ni->size_lock, flags);
 470        }
 471retry_remap:
 472        rl = ni->runlist.rl;
 473        if (likely(rl && vcn >= rl[0].vcn)) {
 474                while (likely(rl->length)) {
 475                        if (unlikely(vcn < rl[1].vcn)) {
 476                                if (likely(rl->lcn >= LCN_HOLE)) {
 477                                        ntfs_debug("Done.");
 478                                        return rl;
 479                                }
 480                                break;
 481                        }
 482                        rl++;
 483                }
 484                if (likely(rl->lcn != LCN_RL_NOT_MAPPED)) {
 485                        if (likely(rl->lcn == LCN_ENOENT))
 486                                err = -ENOENT;
 487                        else
 488                                err = -EIO;
 489                }
 490        }
 491        if (!err && !is_retry) {
 492                /*
 493                 * If the search context is invalid we cannot map the unmapped
 494                 * region.
 495                 */
 496                if (IS_ERR(ctx->mrec))
 497                        err = PTR_ERR(ctx->mrec);
 498                else {
 499                        /*
 500                         * The @vcn is in an unmapped region, map the runlist
 501                         * and retry.
 502                         */
 503                        err = ntfs_map_runlist_nolock(ni, vcn, ctx);
 504                        if (likely(!err)) {
 505                                is_retry = true;
 506                                goto retry_remap;
 507                        }
 508                }
 509                if (err == -EINVAL)
 510                        err = -EIO;
 511        } else if (!err)
 512                err = -EIO;
 513        if (err != -ENOENT)
 514                ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
 515        return ERR_PTR(err);
 516}
 517
 518/**
 519 * ntfs_attr_find - find (next) attribute in mft record
 520 * @type:       attribute type to find
 521 * @name:       attribute name to find (optional, i.e. NULL means don't care)
 522 * @name_len:   attribute name length (only needed if @name present)
 523 * @ic:         IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
 524 * @val:        attribute value to find (optional, resident attributes only)
 525 * @val_len:    attribute value length
 526 * @ctx:        search context with mft record and attribute to search from
 527 *
 528 * You should not need to call this function directly.  Use ntfs_attr_lookup()
 529 * instead.
 530 *
 531 * ntfs_attr_find() takes a search context @ctx as parameter and searches the
 532 * mft record specified by @ctx->mrec, beginning at @ctx->attr, for an
 533 * attribute of @type, optionally @name and @val.
 534 *
 535 * If the attribute is found, ntfs_attr_find() returns 0 and @ctx->attr will
 536 * point to the found attribute.
 537 *
 538 * If the attribute is not found, ntfs_attr_find() returns -ENOENT and
 539 * @ctx->attr will point to the attribute before which the attribute being
 540 * searched for would need to be inserted if such an action were to be desired.
 541 *
 542 * On actual error, ntfs_attr_find() returns -EIO.  In this case @ctx->attr is
 543 * undefined and in particular do not rely on it not changing.
 544 *
 545 * If @ctx->is_first is 'true', the search begins with @ctx->attr itself.  If it
 546 * is 'false', the search begins after @ctx->attr.
 547 *
 548 * If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and
 549 * @ctx->ntfs_ino must be set to the ntfs inode to which the mft record
 550 * @ctx->mrec belongs.  This is so we can get at the ntfs volume and hence at
 551 * the upcase table.  If @ic is CASE_SENSITIVE, the comparison is case
 552 * sensitive.  When @name is present, @name_len is the @name length in Unicode
 553 * characters.
 554 *
 555 * If @name is not present (NULL), we assume that the unnamed attribute is
 556 * being searched for.
 557 *
 558 * Finally, the resident attribute value @val is looked for, if present.  If
 559 * @val is not present (NULL), @val_len is ignored.
 560 *
 561 * ntfs_attr_find() only searches the specified mft record and it ignores the
 562 * presence of an attribute list attribute (unless it is the one being searched
 563 * for, obviously).  If you need to take attribute lists into consideration,
 564 * use ntfs_attr_lookup() instead (see below).  This also means that you cannot
 565 * use ntfs_attr_find() to search for extent records of non-resident
 566 * attributes, as extents with lowest_vcn != 0 are usually described by the
 567 * attribute list attribute only. - Note that it is possible that the first
 568 * extent is only in the attribute list while the last extent is in the base
 569 * mft record, so do not rely on being able to find the first extent in the
 570 * base mft record.
 571 *
 572 * Warning: Never use @val when looking for attribute types which can be
 573 *          non-resident as this most likely will result in a crash!
 574 */
 575static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
 576                const u32 name_len, const IGNORE_CASE_BOOL ic,
 577                const u8 *val, const u32 val_len, ntfs_attr_search_ctx *ctx)
 578{
 579        ATTR_RECORD *a;
 580        ntfs_volume *vol = ctx->ntfs_ino->vol;
 581        ntfschar *upcase = vol->upcase;
 582        u32 upcase_len = vol->upcase_len;
 583
 584        /*
 585         * Iterate over attributes in mft record starting at @ctx->attr, or the
 586         * attribute following that, if @ctx->is_first is 'true'.
 587         */
 588        if (ctx->is_first) {
 589                a = ctx->attr;
 590                ctx->is_first = false;
 591        } else
 592                a = (ATTR_RECORD*)((u8*)ctx->attr +
 593                                le32_to_cpu(ctx->attr->length));
 594        for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
 595                if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
 596                                le32_to_cpu(ctx->mrec->bytes_allocated))
 597                        break;
 598                ctx->attr = a;
 599                if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
 600                                a->type == AT_END))
 601                        return -ENOENT;
 602                if (unlikely(!a->length))
 603                        break;
 604                if (a->type != type)
 605                        continue;
 606                /*
 607                 * If @name is present, compare the two names.  If @name is
 608                 * missing, assume we want an unnamed attribute.
 609                 */
 610                if (!name) {
 611                        /* The search failed if the found attribute is named. */
 612                        if (a->name_length)
 613                                return -ENOENT;
 614                } else if (!ntfs_are_names_equal(name, name_len,
 615                            (ntfschar*)((u8*)a + le16_to_cpu(a->name_offset)),
 616                            a->name_length, ic, upcase, upcase_len)) {
 617                        register int rc;
 618
 619                        rc = ntfs_collate_names(name, name_len,
 620                                        (ntfschar*)((u8*)a +
 621                                        le16_to_cpu(a->name_offset)),
 622                                        a->name_length, 1, IGNORE_CASE,
 623                                        upcase, upcase_len);
 624                        /*
 625                         * If @name collates before a->name, there is no
 626                         * matching attribute.
 627                         */
 628                        if (rc == -1)
 629                                return -ENOENT;
 630                        /* If the strings are not equal, continue search. */
 631                        if (rc)
 632                                continue;
 633                        rc = ntfs_collate_names(name, name_len,
 634                                        (ntfschar*)((u8*)a +
 635                                        le16_to_cpu(a->name_offset)),
 636                                        a->name_length, 1, CASE_SENSITIVE,
 637                                        upcase, upcase_len);
 638                        if (rc == -1)
 639                                return -ENOENT;
 640                        if (rc)
 641                                continue;
 642                }
 643                /*
 644                 * The names match or @name not present and attribute is
 645                 * unnamed.  If no @val specified, we have found the attribute
 646                 * and are done.
 647                 */
 648                if (!val)
 649                        return 0;
 650                /* @val is present; compare values. */
 651                else {
 652                        register int rc;
 653
 654                        rc = memcmp(val, (u8*)a + le16_to_cpu(
 655                                        a->data.resident.value_offset),
 656                                        min_t(u32, val_len, le32_to_cpu(
 657                                        a->data.resident.value_length)));
 658                        /*
 659                         * If @val collates before the current attribute's
 660                         * value, there is no matching attribute.
 661                         */
 662                        if (!rc) {
 663                                register u32 avl;
 664
 665                                avl = le32_to_cpu(
 666                                                a->data.resident.value_length);
 667                                if (val_len == avl)
 668                                        return 0;
 669                                if (val_len < avl)
 670                                        return -ENOENT;
 671                        } else if (rc < 0)
 672                                return -ENOENT;
 673                }
 674        }
 675        ntfs_error(vol->sb, "Inode is corrupt.  Run chkdsk.");
 676        NVolSetErrors(vol);
 677        return -EIO;
 678}
 679
 680/**
 681 * load_attribute_list - load an attribute list into memory
 682 * @vol:                ntfs volume from which to read
 683 * @runlist:            runlist of the attribute list
 684 * @al_start:           destination buffer
 685 * @size:               size of the destination buffer in bytes
 686 * @initialized_size:   initialized size of the attribute list
 687 *
 688 * Walk the runlist @runlist and load all clusters from it copying them into
 689 * the linear buffer @al. The maximum number of bytes copied to @al is @size
 690 * bytes. Note, @size does not need to be a multiple of the cluster size. If
 691 * @initialized_size is less than @size, the region in @al between
 692 * @initialized_size and @size will be zeroed and not read from disk.
 693 *
 694 * Return 0 on success or -errno on error.
 695 */
 696int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start,
 697                const s64 size, const s64 initialized_size)
 698{
 699        LCN lcn;
 700        u8 *al = al_start;
 701        u8 *al_end = al + initialized_size;
 702        runlist_element *rl;
 703        struct buffer_head *bh;
 704        struct super_block *sb;
 705        unsigned long block_size;
 706        unsigned long block, max_block;
 707        int err = 0;
 708        unsigned char block_size_bits;
 709
 710        ntfs_debug("Entering.");
 711        if (!vol || !runlist || !al || size <= 0 || initialized_size < 0 ||
 712                        initialized_size > size)
 713                return -EINVAL;
 714        if (!initialized_size) {
 715                memset(al, 0, size);
 716                return 0;
 717        }
 718        sb = vol->sb;
 719        block_size = sb->s_blocksize;
 720        block_size_bits = sb->s_blocksize_bits;
 721        down_read(&runlist->lock);
 722        rl = runlist->rl;
 723        if (!rl) {
 724                ntfs_error(sb, "Cannot read attribute list since runlist is "
 725                                "missing.");
 726                goto err_out;   
 727        }
 728        /* Read all clusters specified by the runlist one run at a time. */
 729        while (rl->length) {
 730                lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
 731                ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
 732                                (unsigned long long)rl->vcn,
 733                                (unsigned long long)lcn);
 734                /* The attribute list cannot be sparse. */
 735                if (lcn < 0) {
 736                        ntfs_error(sb, "ntfs_rl_vcn_to_lcn() failed.  Cannot "
 737                                        "read attribute list.");
 738                        goto err_out;
 739                }
 740                block = lcn << vol->cluster_size_bits >> block_size_bits;
 741                /* Read the run from device in chunks of block_size bytes. */
 742                max_block = block + (rl->length << vol->cluster_size_bits >>
 743                                block_size_bits);
 744                ntfs_debug("max_block = 0x%lx.", max_block);
 745                do {
 746                        ntfs_debug("Reading block = 0x%lx.", block);
 747                        bh = sb_bread(sb, block);
 748                        if (!bh) {
 749                                ntfs_error(sb, "sb_bread() failed. Cannot "
 750                                                "read attribute list.");
 751                                goto err_out;
 752                        }
 753                        if (al + block_size >= al_end)
 754                                goto do_final;
 755                        memcpy(al, bh->b_data, block_size);
 756                        brelse(bh);
 757                        al += block_size;
 758                } while (++block < max_block);
 759                rl++;
 760        }
 761        if (initialized_size < size) {
 762initialize:
 763                memset(al_start + initialized_size, 0, size - initialized_size);
 764        }
 765done:
 766        up_read(&runlist->lock);
 767        return err;
 768do_final:
 769        if (al < al_end) {
 770                /*
 771                 * Partial block.
 772                 *
 773                 * Note: The attribute list can be smaller than its allocation
 774                 * by multiple clusters.  This has been encountered by at least
 775                 * two people running Windows XP, thus we cannot do any
 776                 * truncation sanity checking here. (AIA)
 777                 */
 778                memcpy(al, bh->b_data, al_end - al);
 779                brelse(bh);
 780                if (initialized_size < size)
 781                        goto initialize;
 782                goto done;
 783        }
 784        brelse(bh);
 785        /* Real overflow! */
 786        ntfs_error(sb, "Attribute list buffer overflow. Read attribute list "
 787                        "is truncated.");
 788err_out:
 789        err = -EIO;
 790        goto done;
 791}
 792
 793/**
 794 * ntfs_external_attr_find - find an attribute in the attribute list of an inode
 795 * @type:       attribute type to find
 796 * @name:       attribute name to find (optional, i.e. NULL means don't care)
 797 * @name_len:   attribute name length (only needed if @name present)
 798 * @ic:         IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
 799 * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only)
 800 * @val:        attribute value to find (optional, resident attributes only)
 801 * @val_len:    attribute value length
 802 * @ctx:        search context with mft record and attribute to search from
 803 *
 804 * You should not need to call this function directly.  Use ntfs_attr_lookup()
 805 * instead.
 806 *
 807 * Find an attribute by searching the attribute list for the corresponding
 808 * attribute list entry.  Having found the entry, map the mft record if the
 809 * attribute is in a different mft record/inode, ntfs_attr_find() the attribute
 810 * in there and return it.
 811 *
 812 * On first search @ctx->ntfs_ino must be the base mft record and @ctx must
 813 * have been obtained from a call to ntfs_attr_get_search_ctx().  On subsequent
 814 * calls @ctx->ntfs_ino can be any extent inode, too (@ctx->base_ntfs_ino is
 815 * then the base inode).
 816 *
 817 * After finishing with the attribute/mft record you need to call
 818 * ntfs_attr_put_search_ctx() to cleanup the search context (unmapping any
 819 * mapped inodes, etc).
 820 *
 821 * If the attribute is found, ntfs_external_attr_find() returns 0 and
 822 * @ctx->attr will point to the found attribute.  @ctx->mrec will point to the
 823 * mft record in which @ctx->attr is located and @ctx->al_entry will point to
 824 * the attribute list entry for the attribute.
 825 *
 826 * If the attribute is not found, ntfs_external_attr_find() returns -ENOENT and
 827 * @ctx->attr will point to the attribute in the base mft record before which
 828 * the attribute being searched for would need to be inserted if such an action
 829 * were to be desired.  @ctx->mrec will point to the mft record in which
 830 * @ctx->attr is located and @ctx->al_entry will point to the attribute list
 831 * entry of the attribute before which the attribute being searched for would
 832 * need to be inserted if such an action were to be desired.
 833 *
 834 * Thus to insert the not found attribute, one wants to add the attribute to
 835 * @ctx->mrec (the base mft record) and if there is not enough space, the
 836 * attribute should be placed in a newly allocated extent mft record.  The
 837 * attribute list entry for the inserted attribute should be inserted in the
 838 * attribute list attribute at @ctx->al_entry.
 839 *
 840 * On actual error, ntfs_external_attr_find() returns -EIO.  In this case
 841 * @ctx->attr is undefined and in particular do not rely on it not changing.
 842 */
 843static int ntfs_external_attr_find(const ATTR_TYPE type,
 844                const ntfschar *name, const u32 name_len,
 845                const IGNORE_CASE_BOOL ic, const VCN lowest_vcn,
 846                const u8 *val, const u32 val_len, ntfs_attr_search_ctx *ctx)
 847{
 848        ntfs_inode *base_ni, *ni;
 849        ntfs_volume *vol;
 850        ATTR_LIST_ENTRY *al_entry, *next_al_entry;
 851        u8 *al_start, *al_end;
 852        ATTR_RECORD *a;
 853        ntfschar *al_name;
 854        u32 al_name_len;
 855        int err = 0;
 856        static const char *es = " Unmount and run chkdsk.";
 857
 858        ni = ctx->ntfs_ino;
 859        base_ni = ctx->base_ntfs_ino;
 860        ntfs_debug("Entering for inode 0x%lx, type 0x%x.", ni->mft_no, type);
 861        if (!base_ni) {
 862                /* First call happens with the base mft record. */
 863                base_ni = ctx->base_ntfs_ino = ctx->ntfs_ino;
 864                ctx->base_mrec = ctx->mrec;
 865        }
 866        if (ni == base_ni)
 867                ctx->base_attr = ctx->attr;
 868        if (type == AT_END)
 869                goto not_found;
 870        vol = base_ni->vol;
 871        al_start = base_ni->attr_list;
 872        al_end = al_start + base_ni->attr_list_size;
 873        if (!ctx->al_entry)
 874                ctx->al_entry = (ATTR_LIST_ENTRY*)al_start;
 875        /*
 876         * Iterate over entries in attribute list starting at @ctx->al_entry,
 877         * or the entry following that, if @ctx->is_first is 'true'.
 878         */
 879        if (ctx->is_first) {
 880                al_entry = ctx->al_entry;
 881                ctx->is_first = false;
 882        } else
 883                al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry +
 884                                le16_to_cpu(ctx->al_entry->length));
 885        for (;; al_entry = next_al_entry) {
 886                /* Out of bounds check. */
 887                if ((u8*)al_entry < base_ni->attr_list ||
 888                                (u8*)al_entry > al_end)
 889                        break;  /* Inode is corrupt. */
 890                ctx->al_entry = al_entry;
 891                /* Catch the end of the attribute list. */
 892                if ((u8*)al_entry == al_end)
 893                        goto not_found;
 894                if (!al_entry->length)
 895                        break;
 896                if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
 897                                le16_to_cpu(al_entry->length) > al_end)
 898                        break;
 899                next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
 900                                le16_to_cpu(al_entry->length));
 901                if (le32_to_cpu(al_entry->type) > le32_to_cpu(type))
 902                        goto not_found;
 903                if (type != al_entry->type)
 904                        continue;
 905                /*
 906                 * If @name is present, compare the two names.  If @name is
 907                 * missing, assume we want an unnamed attribute.
 908                 */
 909                al_name_len = al_entry->name_length;
 910                al_name = (ntfschar*)((u8*)al_entry + al_entry->name_offset);
 911                if (!name) {
 912                        if (al_name_len)
 913                                goto not_found;
 914                } else if (!ntfs_are_names_equal(al_name, al_name_len, name,
 915                                name_len, ic, vol->upcase, vol->upcase_len)) {
 916                        register int rc;
 917
 918                        rc = ntfs_collate_names(name, name_len, al_name,
 919                                        al_name_len, 1, IGNORE_CASE,
 920                                        vol->upcase, vol->upcase_len);
 921                        /*
 922                         * If @name collates before al_name, there is no
 923                         * matching attribute.
 924                         */
 925                        if (rc == -1)
 926                                goto not_found;
 927                        /* If the strings are not equal, continue search. */
 928                        if (rc)
 929                                continue;
 930                        /*
 931                         * FIXME: Reverse engineering showed 0, IGNORE_CASE but
 932                         * that is inconsistent with ntfs_attr_find().  The
 933                         * subsequent rc checks were also different.  Perhaps I
 934                         * made a mistake in one of the two.  Need to recheck
 935                         * which is correct or at least see what is going on...
 936                         * (AIA)
 937                         */
 938                        rc = ntfs_collate_names(name, name_len, al_name,
 939                                        al_name_len, 1, CASE_SENSITIVE,
 940                                        vol->upcase, vol->upcase_len);
 941                        if (rc == -1)
 942                                goto not_found;
 943                        if (rc)
 944                                continue;
 945                }
 946                /*
 947                 * The names match or @name not present and attribute is
 948                 * unnamed.  Now check @lowest_vcn.  Continue search if the
 949                 * next attribute list entry still fits @lowest_vcn.  Otherwise
 950                 * we have reached the right one or the search has failed.
 951                 */
 952                if (lowest_vcn && (u8*)next_al_entry >= al_start            &&
 953                                (u8*)next_al_entry + 6 < al_end             &&
 954                                (u8*)next_al_entry + le16_to_cpu(
 955                                        next_al_entry->length) <= al_end    &&
 956                                sle64_to_cpu(next_al_entry->lowest_vcn) <=
 957                                        lowest_vcn                          &&
 958                                next_al_entry->type == al_entry->type       &&
 959                                next_al_entry->name_length == al_name_len   &&
 960                                ntfs_are_names_equal((ntfschar*)((u8*)
 961                                        next_al_entry +
 962                                        next_al_entry->name_offset),
 963                                        next_al_entry->name_length,
 964                                        al_name, al_name_len, CASE_SENSITIVE,
 965                                        vol->upcase, vol->upcase_len))
 966                        continue;
 967                if (MREF_LE(al_entry->mft_reference) == ni->mft_no) {
 968                        if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) {
 969                                ntfs_error(vol->sb, "Found stale mft "
 970                                                "reference in attribute list "
 971                                                "of base inode 0x%lx.%s",
 972                                                base_ni->mft_no, es);
 973                                err = -EIO;
 974                                break;
 975                        }
 976                } else { /* Mft references do not match. */
 977                        /* If there is a mapped record unmap it first. */
 978                        if (ni != base_ni)
 979                                unmap_extent_mft_record(ni);
 980                        /* Do we want the base record back? */
 981                        if (MREF_LE(al_entry->mft_reference) ==
 982                                        base_ni->mft_no) {
 983                                ni = ctx->ntfs_ino = base_ni;
 984                                ctx->mrec = ctx->base_mrec;
 985                        } else {
 986                                /* We want an extent record. */
 987                                ctx->mrec = map_extent_mft_record(base_ni,
 988                                                le64_to_cpu(
 989                                                al_entry->mft_reference), &ni);
 990                                if (IS_ERR(ctx->mrec)) {
 991                                        ntfs_error(vol->sb, "Failed to map "
 992                                                        "extent mft record "
 993                                                        "0x%lx of base inode "
 994                                                        "0x%lx.%s",
 995                                                        MREF_LE(al_entry->
 996                                                        mft_reference),
 997                                                        base_ni->mft_no, es);
 998                                        err = PTR_ERR(ctx->mrec);
 999                                        if (err == -ENOENT)
1000                                                err = -EIO;
1001                                        /* Cause @ctx to be sanitized below. */
1002                                        ni = NULL;
1003                                        break;
1004                                }
1005                                ctx->ntfs_ino = ni;
1006                        }
1007                        ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
1008                                        le16_to_cpu(ctx->mrec->attrs_offset));
1009                }
1010                /*
1011                 * ctx->vfs_ino, ctx->mrec, and ctx->attr now point to the
1012                 * mft record containing the attribute represented by the
1013                 * current al_entry.
1014                 */
1015                /*
1016                 * We could call into ntfs_attr_find() to find the right
1017                 * attribute in this mft record but this would be less
1018                 * efficient and not quite accurate as ntfs_attr_find() ignores
1019                 * the attribute instance numbers for example which become
1020                 * important when one plays with attribute lists.  Also,
1021                 * because a proper match has been found in the attribute list
1022                 * entry above, the comparison can now be optimized.  So it is
1023                 * worth re-implementing a simplified ntfs_attr_find() here.
1024                 */
1025                a = ctx->attr;
1026                /*
1027                 * Use a manual loop so we can still use break and continue
1028                 * with the same meanings as above.
1029                 */
1030do_next_attr_loop:
1031                if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
1032                                le32_to_cpu(ctx->mrec->bytes_allocated))
1033                        break;
1034                if (a->type == AT_END)
1035                        break;
1036                if (!a->length)
1037                        break;
1038                if (al_entry->instance != a->instance)
1039                        goto do_next_attr;
1040                /*
1041                 * If the type and/or the name are mismatched between the
1042                 * attribute list entry and the attribute record, there is
1043                 * corruption so we break and return error EIO.
1044                 */
1045                if (al_entry->type != a->type)
1046                        break;
1047                if (!ntfs_are_names_equal((ntfschar*)((u8*)a +
1048                                le16_to_cpu(a->name_offset)), a->name_length,
1049                                al_name, al_name_len, CASE_SENSITIVE,
1050                                vol->upcase, vol->upcase_len))
1051                        break;
1052                ctx->attr = a;
1053                /*
1054                 * If no @val specified or @val specified and it matches, we
1055                 * have found it!
1056                 */
1057                if (!val || (!a->non_resident && le32_to_cpu(
1058                                a->data.resident.value_length) == val_len &&
1059                                !memcmp((u8*)a +
1060                                le16_to_cpu(a->data.resident.value_offset),
1061                                val, val_len))) {
1062                        ntfs_debug("Done, found.");
1063                        return 0;
1064                }
1065do_next_attr:
1066                /* Proceed to the next attribute in the current mft record. */
1067                a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length));
1068                goto do_next_attr_loop;
1069        }
1070        if (!err) {
1071                ntfs_error(vol->sb, "Base inode 0x%lx contains corrupt "
1072                                "attribute list attribute.%s", base_ni->mft_no,
1073                                es);
1074                err = -EIO;
1075        }
1076        if (ni != base_ni) {
1077                if (ni)
1078                        unmap_extent_mft_record(ni);
1079                ctx->ntfs_ino = base_ni;
1080                ctx->mrec = ctx->base_mrec;
1081                ctx->attr = ctx->base_attr;
1082        }
1083        if (err != -ENOMEM)
1084                NVolSetErrors(vol);
1085        return err;
1086not_found:
1087        /*
1088         * If we were looking for AT_END, we reset the search context @ctx and
1089         * use ntfs_attr_find() to seek to the end of the base mft record.
1090         */
1091        if (type == AT_END) {
1092                ntfs_attr_reinit_search_ctx(ctx);
1093                return ntfs_attr_find(AT_END, name, name_len, ic, val, val_len,
1094                                ctx);
1095        }
1096        /*
1097         * The attribute was not found.  Before we return, we want to ensure
1098         * @ctx->mrec and @ctx->attr indicate the position at which the
1099         * attribute should be inserted in the base mft record.  Since we also
1100         * want to preserve @ctx->al_entry we cannot reinitialize the search
1101         * context using ntfs_attr_reinit_search_ctx() as this would set
1102         * @ctx->al_entry to NULL.  Thus we do the necessary bits manually (see
1103         * ntfs_attr_init_search_ctx() below).  Note, we _only_ preserve
1104         * @ctx->al_entry as the remaining fields (base_*) are identical to
1105         * their non base_ counterparts and we cannot set @ctx->base_attr
1106         * correctly yet as we do not know what @ctx->attr will be set to by
1107         * the call to ntfs_attr_find() below.
1108         */
1109        if (ni != base_ni)
1110                unmap_extent_mft_record(ni);
1111        ctx->mrec = ctx->base_mrec;
1112        ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
1113                        le16_to_cpu(ctx->mrec->attrs_offset));
1114        ctx->is_first = true;
1115        ctx->ntfs_ino = base_ni;
1116        ctx->base_ntfs_ino = NULL;
1117        ctx->base_mrec = NULL;
1118        ctx->base_attr = NULL;
1119        /*
1120         * In case there are multiple matches in the base mft record, need to
1121         * keep enumerating until we get an attribute not found response (or
1122         * another error), otherwise we would keep returning the same attribute
1123         * over and over again and all programs using us for enumeration would
1124         * lock up in a tight loop.
1125         */
1126        do {
1127                err = ntfs_attr_find(type, name, name_len, ic, val, val_len,
1128                                ctx);
1129        } while (!err);
1130        ntfs_debug("Done, not found.");
1131        return err;
1132}
1133
1134/**
1135 * ntfs_attr_lookup - find an attribute in an ntfs inode
1136 * @type:       attribute type to find
1137 * @name:       attribute name to find (optional, i.e. NULL means don't care)
1138 * @name_len:   attribute name length (only needed if @name present)
1139 * @ic:         IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
1140 * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only)
1141 * @val:        attribute value to find (optional, resident attributes only)
1142 * @val_len:    attribute value length
1143 * @ctx:        search context with mft record and attribute to search from
1144 *
1145 * Find an attribute in an ntfs inode.  On first search @ctx->ntfs_ino must
1146 * be the base mft record and @ctx must have been obtained from a call to
1147 * ntfs_attr_get_search_ctx().
1148 *
1149 * This function transparently handles attribute lists and @ctx is used to
1150 * continue searches where they were left off at.
1151 *
1152 * After finishing with the attribute/mft record you need to call
1153 * ntfs_attr_put_search_ctx() to cleanup the search context (unmapping any
1154 * mapped inodes, etc).
1155 *
1156 * Return 0 if the search was successful and -errno if not.
1157 *
1158 * When 0, @ctx->attr is the found attribute and it is in mft record
1159 * @ctx->mrec.  If an attribute list attribute is present, @ctx->al_entry is
1160 * the attribute list entry of the found attribute.
1161 *
1162 * When -ENOENT, @ctx->attr is the attribute which collates just after the
1163 * attribute being searched for, i.e. if one wants to add the attribute to the
1164 * mft record this is the correct place to insert it into.  If an attribute
1165 * list attribute is present, @ctx->al_entry is the attribute list entry which
1166 * collates just after the attribute list entry of the attribute being searched
1167 * for, i.e. if one wants to add the attribute to the mft record this is the
1168 * correct place to insert its attribute list entry into.
1169 *
1170 * When -errno != -ENOENT, an error occurred during the lookup.  @ctx->attr is
1171 * then undefined and in particular you should not rely on it not changing.
1172 */
1173int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
1174                const u32 name_len, const IGNORE_CASE_BOOL ic,
1175                const VCN lowest_vcn, const u8 *val, const u32 val_len,
1176                ntfs_attr_search_ctx *ctx)
1177{
1178        ntfs_inode *base_ni;
1179
1180        ntfs_debug("Entering.");
1181        BUG_ON(IS_ERR(ctx->mrec));
1182        if (ctx->base_ntfs_ino)
1183                base_ni = ctx->base_ntfs_ino;
1184        else
1185                base_ni = ctx->ntfs_ino;
1186        /* Sanity check, just for debugging really. */
1187        BUG_ON(!base_ni);
1188        if (!NInoAttrList(base_ni) || type == AT_ATTRIBUTE_LIST)
1189                return ntfs_attr_find(type, name, name_len, ic, val, val_len,
1190                                ctx);
1191        return ntfs_external_attr_find(type, name, name_len, ic, lowest_vcn,
1192                        val, val_len, ctx);
1193}
1194
1195/**
1196 * ntfs_attr_init_search_ctx - initialize an attribute search context
1197 * @ctx:        attribute search context to initialize
1198 * @ni:         ntfs inode with which to initialize the search context
1199 * @mrec:       mft record with which to initialize the search context
1200 *
1201 * Initialize the attribute search context @ctx with @ni and @mrec.
1202 */
1203static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx,
1204                ntfs_inode *ni, MFT_RECORD *mrec)
1205{
1206        *ctx = (ntfs_attr_search_ctx) {
1207                .mrec = mrec,
1208                /* Sanity checks are performed elsewhere. */
1209                .attr = (ATTR_RECORD*)((u8*)mrec +
1210                                le16_to_cpu(mrec->attrs_offset)),
1211                .is_first = true,
1212                .ntfs_ino = ni,
1213        };
1214}
1215
1216/**
1217 * ntfs_attr_reinit_search_ctx - reinitialize an attribute search context
1218 * @ctx:        attribute search context to reinitialize
1219 *
1220 * Reinitialize the attribute search context @ctx, unmapping an associated
1221 * extent mft record if present, and initialize the search context again.
1222 *
1223 * This is used when a search for a new attribute is being started to reset
1224 * the search context to the beginning.
1225 */
1226void ntfs_attr_reinit_search_ctx(ntfs_attr_search_ctx *ctx)
1227{
1228        if (likely(!ctx->base_ntfs_ino)) {
1229                /* No attribute list. */
1230                ctx->is_first = true;
1231                /* Sanity checks are performed elsewhere. */
1232                ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
1233                                le16_to_cpu(ctx->mrec->attrs_offset));
1234                /*
1235                 * This needs resetting due to ntfs_external_attr_find() which
1236                 * can leave it set despite having zeroed ctx->base_ntfs_ino.
1237                 */
1238                ctx->al_entry = NULL;
1239                return;
1240        } /* Attribute list. */
1241        if (ctx->ntfs_ino != ctx->base_ntfs_ino)
1242                unmap_extent_mft_record(ctx->ntfs_ino);
1243        ntfs_attr_init_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec);
1244        return;
1245}
1246
1247/**
1248 * ntfs_attr_get_search_ctx - allocate/initialize a new attribute search context
1249 * @ni:         ntfs inode with which to initialize the search context
1250 * @mrec:       mft record with which to initialize the search context
1251 *
1252 * Allocate a new attribute search context, initialize it with @ni and @mrec,
1253 * and return it. Return NULL if allocation failed.
1254 */
1255ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec)
1256{
1257        ntfs_attr_search_ctx *ctx;
1258
1259        ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS);
1260        if (ctx)
1261                ntfs_attr_init_search_ctx(ctx, ni, mrec);
1262        return ctx;
1263}
1264
1265/**
1266 * ntfs_attr_put_search_ctx - release an attribute search context
1267 * @ctx:        attribute search context to free
1268 *
1269 * Release the attribute search context @ctx, unmapping an associated extent
1270 * mft record if present.
1271 */
1272void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx)
1273{
1274        if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino)
1275                unmap_extent_mft_record(ctx->ntfs_ino);
1276        kmem_cache_free(ntfs_attr_ctx_cache, ctx);
1277        return;
1278}
1279
1280#ifdef NTFS_RW
1281
1282/**
1283 * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file
1284 * @vol:        ntfs volume to which the attribute belongs
1285 * @type:       attribute type which to find
1286 *
1287 * Search for the attribute definition record corresponding to the attribute
1288 * @type in the $AttrDef system file.
1289 *
1290 * Return the attribute type definition record if found and NULL if not found.
1291 */
1292static ATTR_DEF *ntfs_attr_find_in_attrdef(const ntfs_volume *vol,
1293                const ATTR_TYPE type)
1294{
1295        ATTR_DEF *ad;
1296
1297        BUG_ON(!vol->attrdef);
1298        BUG_ON(!type);
1299        for (ad = vol->attrdef; (u8*)ad - (u8*)vol->attrdef <
1300                        vol->attrdef_size && ad->type; ++ad) {
1301                /* We have not found it yet, carry on searching. */
1302                if (likely(le32_to_cpu(ad->type) < le32_to_cpu(type)))
1303                        continue;
1304                /* We found the attribute; return it. */
1305                if (likely(ad->type == type))
1306                        return ad;
1307                /* We have gone too far already.  No point in continuing. */
1308                break;
1309        }
1310        /* Attribute not found. */
1311        ntfs_debug("Attribute type 0x%x not found in $AttrDef.",
1312                        le32_to_cpu(type));
1313        return NULL;
1314}
1315
1316/**
1317 * ntfs_attr_size_bounds_check - check a size of an attribute type for validity
1318 * @vol:        ntfs volume to which the attribute belongs
1319 * @type:       attribute type which to check
1320 * @size:       size which to check
1321 *
1322 * Check whether the @size in bytes is valid for an attribute of @type on the
1323 * ntfs volume @vol.  This information is obtained from $AttrDef system file.
1324 *
1325 * Return 0 if valid, -ERANGE if not valid, or -ENOENT if the attribute is not
1326 * listed in $AttrDef.
1327 */
1328int ntfs_attr_size_bounds_check(const ntfs_volume *vol, const ATTR_TYPE type,
1329                const s64 size)
1330{
1331        ATTR_DEF *ad;
1332
1333        BUG_ON(size < 0);
1334        /*
1335         * $ATTRIBUTE_LIST has a maximum size of 256kiB, but this is not
1336         * listed in $AttrDef.
1337         */
1338        if (unlikely(type == AT_ATTRIBUTE_LIST && size > 256 * 1024))
1339                return -ERANGE;
1340        /* Get the $AttrDef entry for the attribute @type. */
1341        ad = ntfs_attr_find_in_attrdef(vol, type);
1342        if (unlikely(!ad))
1343                return -ENOENT;
1344        /* Do the bounds check. */
1345        if (((sle64_to_cpu(ad->min_size) > 0) &&
1346                        size < sle64_to_cpu(ad->min_size)) ||
1347                        ((sle64_to_cpu(ad->max_size) > 0) && size >
1348                        sle64_to_cpu(ad->max_size)))
1349                return -ERANGE;
1350        return 0;
1351}
1352
1353/**
1354 * ntfs_attr_can_be_non_resident - check if an attribute can be non-resident
1355 * @vol:        ntfs volume to which the attribute belongs
1356 * @type:       attribute type which to check
1357 *
1358 * Check whether the attribute of @type on the ntfs volume @vol is allowed to
1359 * be non-resident.  This information is obtained from $AttrDef system file.
1360 *
1361 * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, and
1362 * -ENOENT if the attribute is not listed in $AttrDef.
1363 */
1364int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1365{
1366        ATTR_DEF *ad;
1367
1368        /* Find the attribute definition record in $AttrDef. */
1369        ad = ntfs_attr_find_in_attrdef(vol, type);
1370        if (unlikely(!ad))
1371                return -ENOENT;
1372        /* Check the flags and return the result. */
1373        if (ad->flags & ATTR_DEF_RESIDENT)
1374                return -EPERM;
1375        return 0;
1376}
1377
1378/**
1379 * ntfs_attr_can_be_resident - check if an attribute can be resident
1380 * @vol:        ntfs volume to which the attribute belongs
1381 * @type:       attribute type which to check
1382 *
1383 * Check whether the attribute of @type on the ntfs volume @vol is allowed to
1384 * be resident.  This information is derived from our ntfs knowledge and may
1385 * not be completely accurate, especially when user defined attributes are
1386 * present.  Basically we allow everything to be resident except for index
1387 * allocation and $EA attributes.
1388 *
1389 * Return 0 if the attribute is allowed to be non-resident and -EPERM if not.
1390 *
1391 * Warning: In the system file $MFT the attribute $Bitmap must be non-resident
1392 *          otherwise windows will not boot (blue screen of death)!  We cannot
1393 *          check for this here as we do not know which inode's $Bitmap is
1394 *          being asked about so the caller needs to special case this.
1395 */
1396int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1397{
1398        if (type == AT_INDEX_ALLOCATION)
1399                return -EPERM;
1400        return 0;
1401}
1402
1403/**
1404 * ntfs_attr_record_resize - resize an attribute record
1405 * @m:          mft record containing attribute record
1406 * @a:          attribute record to resize
1407 * @new_size:   new size in bytes to which to resize the attribute record @a
1408 *
1409 * Resize the attribute record @a, i.e. the resident part of the attribute, in
1410 * the mft record @m to @new_size bytes.
1411 *
1412 * Return 0 on success and -errno on error.  The following error codes are
1413 * defined:
1414 *      -ENOSPC - Not enough space in the mft record @m to perform the resize.
1415 *
1416 * Note: On error, no modifications have been performed whatsoever.
1417 *
1418 * Warning: If you make a record smaller without having copied all the data you
1419 *          are interested in the data may be overwritten.
1420 */
1421int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
1422{
1423        ntfs_debug("Entering for new_size %u.", new_size);
1424        /* Align to 8 bytes if it is not already done. */
1425        if (new_size & 7)
1426                new_size = (new_size + 7) & ~7;
1427        /* If the actual attribute length has changed, move things around. */
1428        if (new_size != le32_to_cpu(a->length)) {
1429                u32 new_muse = le32_to_cpu(m->bytes_in_use) -
1430                                le32_to_cpu(a->length) + new_size;
1431                /* Not enough space in this mft record. */
1432                if (new_muse > le32_to_cpu(m->bytes_allocated))
1433                        return -ENOSPC;
1434                /* Move attributes following @a to their new location. */
1435                memmove((u8*)a + new_size, (u8*)a + le32_to_cpu(a->length),
1436                                le32_to_cpu(m->bytes_in_use) - ((u8*)a -
1437                                (u8*)m) - le32_to_cpu(a->length));
1438                /* Adjust @m to reflect the change in used space. */
1439                m->bytes_in_use = cpu_to_le32(new_muse);
1440                /* Adjust @a to reflect the new size. */
1441                if (new_size >= offsetof(ATTR_REC, length) + sizeof(a->length))
1442                        a->length = cpu_to_le32(new_size);
1443        }
1444        return 0;
1445}
1446
1447/**
1448 * ntfs_resident_attr_value_resize - resize the value of a resident attribute
1449 * @m:          mft record containing attribute record
1450 * @a:          attribute record whose value to resize
1451 * @new_size:   new size in bytes to which to resize the attribute value of @a
1452 *
1453 * Resize the value of the attribute @a in the mft record @m to @new_size bytes.
1454 * If the value is made bigger, the newly allocated space is cleared.
1455 *
1456 * Return 0 on success and -errno on error.  The following error codes are
1457 * defined:
1458 *      -ENOSPC - Not enough space in the mft record @m to perform the resize.
1459 *
1460 * Note: On error, no modifications have been performed whatsoever.
1461 *
1462 * Warning: If you make a record smaller without having copied all the data you
1463 *          are interested in the data may be overwritten.
1464 */
1465int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
1466                const u32 new_size)
1467{
1468        u32 old_size;
1469
1470        /* Resize the resident part of the attribute record. */
1471        if (ntfs_attr_record_resize(m, a,
1472                        le16_to_cpu(a->data.resident.value_offset) + new_size))
1473                return -ENOSPC;
1474        /*
1475         * The resize succeeded!  If we made the attribute value bigger, clear
1476         * the area between the old size and @new_size.
1477         */
1478        old_size = le32_to_cpu(a->data.resident.value_length);
1479        if (new_size > old_size)
1480                memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
1481                                old_size, 0, new_size - old_size);
1482        /* Finally update the length of the attribute value. */
1483        a->data.resident.value_length = cpu_to_le32(new_size);
1484        return 0;
1485}
1486
1487/**
1488 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
1489 * @ni:         ntfs inode describing the attribute to convert
1490 * @data_size:  size of the resident data to copy to the non-resident attribute
1491 *
1492 * Convert the resident ntfs attribute described by the ntfs inode @ni to a
1493 * non-resident one.
1494 *
1495 * @data_size must be equal to the attribute value size.  This is needed since
1496 * we need to know the size before we can map the mft record and our callers
1497 * always know it.  The reason we cannot simply read the size from the vfs
1498 * inode i_size is that this is not necessarily uptodate.  This happens when
1499 * ntfs_attr_make_non_resident() is called in the ->truncate call path(s).
1500 *
1501 * Return 0 on success and -errno on error.  The following error return codes
1502 * are defined:
1503 *      -EPERM  - The attribute is not allowed to be non-resident.
1504 *      -ENOMEM - Not enough memory.
1505 *      -ENOSPC - Not enough disk space.
1506 *      -EINVAL - Attribute not defined on the volume.
1507 *      -EIO    - I/o error or other error.
1508 * Note that -ENOSPC is also returned in the case that there is not enough
1509 * space in the mft record to do the conversion.  This can happen when the mft
1510 * record is already very full.  The caller is responsible for trying to make
1511 * space in the mft record and trying again.  FIXME: Do we need a separate
1512 * error return code for this kind of -ENOSPC or is it always worth trying
1513 * again in case the attribute may then fit in a resident state so no need to
1514 * make it non-resident at all?  Ho-hum...  (AIA)
1515 *
1516 * NOTE to self: No changes in the attribute list are required to move from
1517 *               a resident to a non-resident attribute.
1518 *
1519 * Locking: - The caller must hold i_mutex on the inode.
1520 */
1521int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
1522{
1523        s64 new_size;
1524        struct inode *vi = VFS_I(ni);
1525        ntfs_volume *vol = ni->vol;
1526        ntfs_inode *base_ni;
1527        MFT_RECORD *m;
1528        ATTR_RECORD *a;
1529        ntfs_attr_search_ctx *ctx;
1530        struct page *page;
1531        runlist_element *rl;
1532        u8 *kaddr;
1533        unsigned long flags;
1534        int mp_size, mp_ofs, name_ofs, arec_size, err, err2;
1535        u32 attr_size;
1536        u8 old_res_attr_flags;
1537
1538        /* Check that the attribute is allowed to be non-resident. */
1539        err = ntfs_attr_can_be_non_resident(vol, ni->type);
1540        if (unlikely(err)) {
1541                if (err == -EPERM)
1542                        ntfs_debug("Attribute is not allowed to be "
1543                                        "non-resident.");
1544                else
1545                        ntfs_debug("Attribute not defined on the NTFS "
1546                                        "volume!");
1547                return err;
1548        }
1549        /*
1550         * FIXME: Compressed and encrypted attributes are not supported when
1551         * writing and we should never have gotten here for them.
1552         */
1553        BUG_ON(NInoCompressed(ni));
1554        BUG_ON(NInoEncrypted(ni));
1555        /*
1556         * The size needs to be aligned to a cluster boundary for allocation
1557         * purposes.
1558         */
1559        new_size = (data_size + vol->cluster_size - 1) &
1560                        ~(vol->cluster_size - 1);
1561        if (new_size > 0) {
1562                /*
1563                 * Will need the page later and since the page lock nests
1564                 * outside all ntfs locks, we need to get the page now.
1565                 */
1566                page = find_or_create_page(vi->i_mapping, 0,
1567                                mapping_gfp_mask(vi->i_mapping));
1568                if (unlikely(!page))
1569                        return -ENOMEM;
1570                /* Start by allocating clusters to hold the attribute value. */
1571                rl = ntfs_cluster_alloc(vol, 0, new_size >>
1572                                vol->cluster_size_bits, -1, DATA_ZONE, true);
1573                if (IS_ERR(rl)) {
1574                        err = PTR_ERR(rl);
1575                        ntfs_debug("Failed to allocate cluster%s, error code "
1576                                        "%i.", (new_size >>
1577                                        vol->cluster_size_bits) > 1 ? "s" : "",
1578                                        err);
1579                        goto page_err_out;
1580                }
1581        } else {
1582                rl = NULL;
1583                page = NULL;
1584        }
1585        /* Determine the size of the mapping pairs array. */
1586        mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0, -1);
1587        if (unlikely(mp_size < 0)) {
1588                err = mp_size;
1589                ntfs_debug("Failed to get size for mapping pairs array, error "
1590                                "code %i.", err);
1591                goto rl_err_out;
1592        }
1593        down_write(&ni->runlist.lock);
1594        if (!NInoAttr(ni))
1595                base_ni = ni;
1596        else
1597                base_ni = ni->ext.base_ntfs_ino;
1598        m = map_mft_record(base_ni);
1599        if (IS_ERR(m)) {
1600                err = PTR_ERR(m);
1601                m = NULL;
1602                ctx = NULL;
1603                goto err_out;
1604        }
1605        ctx = ntfs_attr_get_search_ctx(base_ni, m);
1606        if (unlikely(!ctx)) {
1607                err = -ENOMEM;
1608                goto err_out;
1609        }
1610        err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1611                        CASE_SENSITIVE, 0, NULL, 0, ctx);
1612        if (unlikely(err)) {
1613                if (err == -ENOENT)
1614                        err = -EIO;
1615                goto err_out;
1616        }
1617        m = ctx->mrec;
1618        a = ctx->attr;
1619        BUG_ON(NInoNonResident(ni));
1620        BUG_ON(a->non_resident);
1621        /*
1622         * Calculate new offsets for the name and the mapping pairs array.
1623         */
1624        if (NInoSparse(ni) || NInoCompressed(ni))
1625                name_ofs = (offsetof(ATTR_REC,
1626                                data.non_resident.compressed_size) +
1627                                sizeof(a->data.non_resident.compressed_size) +
1628                                7) & ~7;
1629        else
1630                name_ofs = (offsetof(ATTR_REC,
1631                                data.non_resident.compressed_size) + 7) & ~7;
1632        mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1633        /*
1634         * Determine the size of the resident part of the now non-resident
1635         * attribute record.
1636         */
1637        arec_size = (mp_ofs + mp_size + 7) & ~7;
1638        /*
1639         * If the page is not uptodate bring it uptodate by copying from the
1640         * attribute value.
1641         */
1642        attr_size = le32_to_cpu(a->data.resident.value_length);
1643        BUG_ON(attr_size != data_size);
1644        if (page && !PageUptodate(page)) {
1645                kaddr = kmap_atomic(page);
1646                memcpy(kaddr, (u8*)a +
1647                                le16_to_cpu(a->data.resident.value_offset),
1648                                attr_size);
1649                memset(kaddr + attr_size, 0, PAGE_SIZE - attr_size);
1650                kunmap_atomic(kaddr);
1651                flush_dcache_page(page);
1652                SetPageUptodate(page);
1653        }
1654        /* Backup the attribute flag. */
1655        old_res_attr_flags = a->data.resident.flags;
1656        /* Resize the resident part of the attribute record. */
1657        err = ntfs_attr_record_resize(m, a, arec_size);
1658        if (unlikely(err))
1659                goto err_out;
1660        /*
1661         * Convert the resident part of the attribute record to describe a
1662         * non-resident attribute.
1663         */
1664        a->non_resident = 1;
1665        /* Move the attribute name if it exists and update the offset. */
1666        if (a->name_length)
1667                memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1668                                a->name_length * sizeof(ntfschar));
1669        a->name_offset = cpu_to_le16(name_ofs);
1670        /* Setup the fields specific to non-resident attributes. */
1671        a->data.non_resident.lowest_vcn = 0;
1672        a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
1673                        vol->cluster_size_bits);
1674        a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
1675        memset(&a->data.non_resident.reserved, 0,
1676                        sizeof(a->data.non_resident.reserved));
1677        a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
1678        a->data.non_resident.data_size =
1679                        a->data.non_resident.initialized_size =
1680                        cpu_to_sle64(attr_size);
1681        if (NInoSparse(ni) || NInoCompressed(ni)) {
1682                a->data.non_resident.compression_unit = 0;
1683                if (NInoCompressed(ni) || vol->major_ver < 3)
1684                        a->data.non_resident.compression_unit = 4;
1685                a->data.non_resident.compressed_size =
1686                                a->data.non_resident.allocated_size;
1687        } else
1688                a->data.non_resident.compression_unit = 0;
1689        /* Generate the mapping pairs array into the attribute record. */
1690        err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
1691                        arec_size - mp_ofs, rl, 0, -1, NULL);
1692        if (unlikely(err)) {
1693                ntfs_debug("Failed to build mapping pairs, error code %i.",
1694                                err);
1695                goto undo_err_out;
1696        }
1697        /* Setup the in-memory attribute structure to be non-resident. */
1698        ni->runlist.rl = rl;
1699        write_lock_irqsave(&ni->size_lock, flags);
1700        ni->allocated_size = new_size;
1701        if (NInoSparse(ni) || NInoCompressed(ni)) {
1702                ni->itype.compressed.size = ni->allocated_size;
1703                if (a->data.non_resident.compression_unit) {
1704                        ni->itype.compressed.block_size = 1U << (a->data.
1705                                        non_resident.compression_unit +
1706                                        vol->cluster_size_bits);
1707                        ni->itype.compressed.block_size_bits =
1708                                        ffs(ni->itype.compressed.block_size) -
1709                                        1;
1710                        ni->itype.compressed.block_clusters = 1U <<
1711                                        a->data.non_resident.compression_unit;
1712                } else {
1713                        ni->itype.compressed.block_size = 0;
1714                        ni->itype.compressed.block_size_bits = 0;
1715                        ni->itype.compressed.block_clusters = 0;
1716                }
1717                vi->i_blocks = ni->itype.compressed.size >> 9;
1718        } else
1719                vi->i_blocks = ni->allocated_size >> 9;
1720        write_unlock_irqrestore(&ni->size_lock, flags);
1721        /*
1722         * This needs to be last since the address space operations ->readpage
1723         * and ->writepage can run concurrently with us as they are not
1724         * serialized on i_mutex.  Note, we are not allowed to fail once we flip
1725         * this switch, which is another reason to do this last.
1726         */
1727        NInoSetNonResident(ni);
1728        /* Mark the mft record dirty, so it gets written back. */
1729        flush_dcache_mft_record_page(ctx->ntfs_ino);
1730        mark_mft_record_dirty(ctx->ntfs_ino);
1731        ntfs_attr_put_search_ctx(ctx);
1732        unmap_mft_record(base_ni);
1733        up_write(&ni->runlist.lock);
1734        if (page) {
1735                set_page_dirty(page);
1736                unlock_page(page);
1737                put_page(page);
1738        }
1739        ntfs_debug("Done.");
1740        return 0;
1741undo_err_out:
1742        /* Convert the attribute back into a resident attribute. */
1743        a->non_resident = 0;
1744        /* Move the attribute name if it exists and update the offset. */
1745        name_ofs = (offsetof(ATTR_RECORD, data.resident.reserved) +
1746                        sizeof(a->data.resident.reserved) + 7) & ~7;
1747        if (a->name_length)
1748                memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1749                                a->name_length * sizeof(ntfschar));
1750        mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1751        a->name_offset = cpu_to_le16(name_ofs);
1752        arec_size = (mp_ofs + attr_size + 7) & ~7;
1753        /* Resize the resident part of the attribute record. */
1754        err2 = ntfs_attr_record_resize(m, a, arec_size);
1755        if (unlikely(err2)) {
1756                /*
1757                 * This cannot happen (well if memory corruption is at work it
1758                 * could happen in theory), but deal with it as well as we can.
1759                 * If the old size is too small, truncate the attribute,
1760                 * otherwise simply give it a larger allocated size.
1761                 * FIXME: Should check whether chkdsk complains when the
1762                 * allocated size is much bigger than the resident value size.
1763                 */
1764                arec_size = le32_to_cpu(a->length);
1765                if ((mp_ofs + attr_size) > arec_size) {
1766                        err2 = attr_size;
1767                        attr_size = arec_size - mp_ofs;
1768                        ntfs_error(vol->sb, "Failed to undo partial resident "
1769                                        "to non-resident attribute "
1770                                        "conversion.  Truncating inode 0x%lx, "
1771                                        "attribute type 0x%x from %i bytes to "
1772                                        "%i bytes to maintain metadata "
1773                                        "consistency.  THIS MEANS YOU ARE "
1774                                        "LOSING %i BYTES DATA FROM THIS %s.",
1775                                        vi->i_ino,
1776                                        (unsigned)le32_to_cpu(ni->type),
1777                                        err2, attr_size, err2 - attr_size,
1778                                        ((ni->type == AT_DATA) &&
1779                                        !ni->name_len) ? "FILE": "ATTRIBUTE");
1780                        write_lock_irqsave(&ni->size_lock, flags);
1781                        ni->initialized_size = attr_size;
1782                        i_size_write(vi, attr_size);
1783                        write_unlock_irqrestore(&ni->size_lock, flags);
1784                }
1785        }
1786        /* Setup the fields specific to resident attributes. */
1787        a->data.resident.value_length = cpu_to_le32(attr_size);
1788        a->data.resident.value_offset = cpu_to_le16(mp_ofs);
1789        a->data.resident.flags = old_res_attr_flags;
1790        memset(&a->data.resident.reserved, 0,
1791                        sizeof(a->data.resident.reserved));
1792        /* Copy the data from the page back to the attribute value. */
1793        if (page) {
1794                kaddr = kmap_atomic(page);
1795                memcpy((u8*)a + mp_ofs, kaddr, attr_size);
1796                kunmap_atomic(kaddr);
1797        }
1798        /* Setup the allocated size in the ntfs inode in case it changed. */
1799        write_lock_irqsave(&ni->size_lock, flags);
1800        ni->allocated_size = arec_size - mp_ofs;
1801        write_unlock_irqrestore(&ni->size_lock, flags);
1802        /* Mark the mft record dirty, so it gets written back. */
1803        flush_dcache_mft_record_page(ctx->ntfs_ino);
1804        mark_mft_record_dirty(ctx->ntfs_ino);
1805err_out:
1806        if (ctx)
1807                ntfs_attr_put_search_ctx(ctx);
1808        if (m)
1809                unmap_mft_record(base_ni);
1810        ni->runlist.rl = NULL;
1811        up_write(&ni->runlist.lock);
1812rl_err_out:
1813        if (rl) {
1814                if (ntfs_cluster_free_from_rl(vol, rl) < 0) {
1815                        ntfs_error(vol->sb, "Failed to release allocated "
1816                                        "cluster(s) in error code path.  Run "
1817                                        "chkdsk to recover the lost "
1818                                        "cluster(s).");
1819                        NVolSetErrors(vol);
1820                }
1821                ntfs_free(rl);
1822page_err_out:
1823                unlock_page(page);
1824                put_page(page);
1825        }
1826        if (err == -EINVAL)
1827                err = -EIO;
1828        return err;
1829}
1830
1831/**
1832 * ntfs_attr_extend_allocation - extend the allocated space of an attribute
1833 * @ni:                 ntfs inode of the attribute whose allocation to extend
1834 * @new_alloc_size:     new size in bytes to which to extend the allocation to
1835 * @new_data_size:      new size in bytes to which to extend the data to
1836 * @data_start:         beginning of region which is required to be non-sparse
1837 *
1838 * Extend the allocated space of an attribute described by the ntfs inode @ni
1839 * to @new_alloc_size bytes.  If @data_start is -1, the whole extension may be
1840 * implemented as a hole in the file (as long as both the volume and the ntfs
1841 * inode @ni have sparse support enabled).  If @data_start is >= 0, then the
1842 * region between the old allocated size and @data_start - 1 may be made sparse
1843 * but the regions between @data_start and @new_alloc_size must be backed by
1844 * actual clusters.
1845 *
1846 * If @new_data_size is -1, it is ignored.  If it is >= 0, then the data size
1847 * of the attribute is extended to @new_data_size.  Note that the i_size of the
1848 * vfs inode is not updated.  Only the data size in the base attribute record
1849 * is updated.  The caller has to update i_size separately if this is required.
1850 * WARNING: It is a BUG() for @new_data_size to be smaller than the old data
1851 * size as well as for @new_data_size to be greater than @new_alloc_size.
1852 *
1853 * For resident attributes this involves resizing the attribute record and if
1854 * necessary moving it and/or other attributes into extent mft records and/or
1855 * converting the attribute to a non-resident attribute which in turn involves
1856 * extending the allocation of a non-resident attribute as described below.
1857 *
1858 * For non-resident attributes this involves allocating clusters in the data
1859 * zone on the volume (except for regions that are being made sparse) and
1860 * extending the run list to describe the allocated clusters as well as
1861 * updating the mapping pairs array of the attribute.  This in turn involves
1862 * resizing the attribute record and if necessary moving it and/or other
1863 * attributes into extent mft records and/or splitting the attribute record
1864 * into multiple extent attribute records.
1865 *
1866 * Also, the attribute list attribute is updated if present and in some of the
1867 * above cases (the ones where extent mft records/attributes come into play),
1868 * an attribute list attribute is created if not already present.
1869 *
1870 * Return the new allocated size on success and -errno on error.  In the case
1871 * that an error is encountered but a partial extension at least up to
1872 * @data_start (if present) is possible, the allocation is partially extended
1873 * and this is returned.  This means the caller must check the returned size to
1874 * determine if the extension was partial.  If @data_start is -1 then partial
1875 * allocations are not performed.
1876 *
1877 * WARNING: Do not call ntfs_attr_extend_allocation() for $MFT/$DATA.
1878 *
1879 * Locking: This function takes the runlist lock of @ni for writing as well as
1880 * locking the mft record of the base ntfs inode.  These locks are maintained
1881 * throughout execution of the function.  These locks are required so that the
1882 * attribute can be resized safely and so that it can for example be converted
1883 * from resident to non-resident safely.
1884 *
1885 * TODO: At present attribute list attribute handling is not implemented.
1886 *
1887 * TODO: At present it is not safe to call this function for anything other
1888 * than the $DATA attribute(s) of an uncompressed and unencrypted file.
1889 */
1890s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
1891                const s64 new_data_size, const s64 data_start)
1892{
1893        VCN vcn;
1894        s64 ll, allocated_size, start = data_start;
1895        struct inode *vi = VFS_I(ni);
1896        ntfs_volume *vol = ni->vol;
1897        ntfs_inode *base_ni;
1898        MFT_RECORD *m;
1899        ATTR_RECORD *a;
1900        ntfs_attr_search_ctx *ctx;
1901        runlist_element *rl, *rl2;
1902        unsigned long flags;
1903        int err, mp_size;
1904        u32 attr_len = 0; /* Silence stupid gcc warning. */
1905        bool mp_rebuilt;
1906
1907#ifdef DEBUG
1908        read_lock_irqsave(&ni->size_lock, flags);
1909        allocated_size = ni->allocated_size;
1910        read_unlock_irqrestore(&ni->size_lock, flags);
1911        ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
1912                        "old_allocated_size 0x%llx, "
1913                        "new_allocated_size 0x%llx, new_data_size 0x%llx, "
1914                        "data_start 0x%llx.", vi->i_ino,
1915                        (unsigned)le32_to_cpu(ni->type),
1916                        (unsigned long long)allocated_size,
1917                        (unsigned long long)new_alloc_size,
1918                        (unsigned long long)new_data_size,
1919                        (unsigned long long)start);
1920#endif
1921retry_extend:
1922        /*
1923         * For non-resident attributes, @start and @new_size need to be aligned
1924         * to cluster boundaries for allocation purposes.
1925         */
1926        if (NInoNonResident(ni)) {
1927                if (start > 0)
1928                        start &= ~(s64)vol->cluster_size_mask;
1929                new_alloc_size = (new_alloc_size + vol->cluster_size - 1) &
1930                                ~(s64)vol->cluster_size_mask;
1931        }
1932        BUG_ON(new_data_size >= 0 && new_data_size > new_alloc_size);
1933        /* Check if new size is allowed in $AttrDef. */
1934        err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size);
1935        if (unlikely(err)) {
1936                /* Only emit errors when the write will fail completely. */
1937                read_lock_irqsave(&ni->size_lock, flags);
1938                allocated_size = ni->allocated_size;
1939                read_unlock_irqrestore(&ni->size_lock, flags);
1940                if (start < 0 || start >= allocated_size) {
1941                        if (err == -ERANGE) {
1942                                ntfs_error(vol->sb, "Cannot extend allocation "
1943                                                "of inode 0x%lx, attribute "
1944                                                "type 0x%x, because the new "
1945                                                "allocation would exceed the "
1946                                                "maximum allowed size for "
1947                                                "this attribute type.",
1948                                                vi->i_ino, (unsigned)
1949                                                le32_to_cpu(ni->type));
1950                        } else {
1951                                ntfs_error(vol->sb, "Cannot extend allocation "
1952                                                "of inode 0x%lx, attribute "
1953                                                "type 0x%x, because this "
1954                                                "attribute type is not "
1955                                                "defined on the NTFS volume.  "
1956                                                "Possible corruption!  You "
1957                                                "should run chkdsk!",
1958                                                vi->i_ino, (unsigned)
1959                                                le32_to_cpu(ni->type));
1960                        }
1961                }
1962                /* Translate error code to be POSIX conformant for write(2). */
1963                if (err == -ERANGE)
1964                        err = -EFBIG;
1965                else
1966                        err = -EIO;
1967                return err;
1968        }
1969        if (!NInoAttr(ni))
1970                base_ni = ni;
1971        else
1972                base_ni = ni->ext.base_ntfs_ino;
1973        /*
1974         * We will be modifying both the runlist (if non-resident) and the mft
1975         * record so lock them both down.
1976         */
1977        down_write(&ni->runlist.lock);
1978        m = map_mft_record(base_ni);
1979        if (IS_ERR(m)) {
1980                err = PTR_ERR(m);
1981                m = NULL;
1982                ctx = NULL;
1983                goto err_out;
1984        }
1985        ctx = ntfs_attr_get_search_ctx(base_ni, m);
1986        if (unlikely(!ctx)) {
1987                err = -ENOMEM;
1988                goto err_out;
1989        }
1990        read_lock_irqsave(&ni->size_lock, flags);
1991        allocated_size = ni->allocated_size;
1992        read_unlock_irqrestore(&ni->size_lock, flags);
1993        /*
1994         * If non-resident, seek to the last extent.  If resident, there is
1995         * only one extent, so seek to that.
1996         */
1997        vcn = NInoNonResident(ni) ? allocated_size >> vol->cluster_size_bits :
1998                        0;
1999        /*
2000         * Abort if someone did the work whilst we waited for the locks.  If we
2001         * just converted the attribute from resident to non-resident it is
2002         * likely that exactly this has happened already.  We cannot quite
2003         * abort if we need to update the data size.
2004         */
2005        if (unlikely(new_alloc_size <= allocated_size)) {
2006                ntfs_debug("Allocated size already exceeds requested size.");
2007                new_alloc_size = allocated_size;
2008                if (new_data_size < 0)
2009                        goto done;
2010                /*
2011                 * We want the first attribute extent so that we can update the
2012                 * data size.
2013                 */
2014                vcn = 0;
2015        }
2016        err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2017                        CASE_SENSITIVE, vcn, NULL, 0, ctx);
2018        if (unlikely(err)) {
2019                if (err == -ENOENT)
2020                        err = -EIO;
2021                goto err_out;
2022        }
2023        m = ctx->mrec;
2024        a = ctx->attr;
2025        /* Use goto to reduce indentation. */
2026        if (a->non_resident)
2027                goto do_non_resident_extend;
2028        BUG_ON(NInoNonResident(ni));
2029        /* The total length of the attribute value. */
2030        attr_len = le32_to_cpu(a->data.resident.value_length);
2031        /*
2032         * Extend the attribute record to be able to store the new attribute
2033         * size.  ntfs_attr_record_resize() will not do anything if the size is
2034         * not changing.
2035         */
2036        if (new_alloc_size < vol->mft_record_size &&
2037                        !ntfs_attr_record_resize(m, a,
2038                        le16_to_cpu(a->data.resident.value_offset) +
2039                        new_alloc_size)) {
2040                /* The resize succeeded! */
2041                write_lock_irqsave(&ni->size_lock, flags);
2042                ni->allocated_size = le32_to_cpu(a->length) -
2043                                le16_to_cpu(a->data.resident.value_offset);
2044                write_unlock_irqrestore(&ni->size_lock, flags);
2045                if (new_data_size >= 0) {
2046                        BUG_ON(new_data_size < attr_len);
2047                        a->data.resident.value_length =
2048                                        cpu_to_le32((u32)new_data_size);
2049                }
2050                goto flush_done;
2051        }
2052        /*
2053         * We have to drop all the locks so we can call
2054         * ntfs_attr_make_non_resident().  This could be optimised by try-
2055         * locking the first page cache page and only if that fails dropping
2056         * the locks, locking the page, and redoing all the locking and
2057         * lookups.  While this would be a huge optimisation, it is not worth
2058         * it as this is definitely a slow code path.
2059         */
2060        ntfs_attr_put_search_ctx(ctx);
2061        unmap_mft_record(base_ni);
2062        up_write(&ni->runlist.lock);
2063        /*
2064         * Not enough space in the mft record, try to make the attribute
2065         * non-resident and if successful restart the extension process.
2066         */
2067        err = ntfs_attr_make_non_resident(ni, attr_len);
2068        if (likely(!err))
2069                goto retry_extend;
2070        /*
2071         * Could not make non-resident.  If this is due to this not being
2072         * permitted for this attribute type or there not being enough space,
2073         * try to make other attributes non-resident.  Otherwise fail.
2074         */
2075        if (unlikely(err != -EPERM && err != -ENOSPC)) {
2076                /* Only emit errors when the write will fail completely. */
2077                read_lock_irqsave(&ni->size_lock, flags);
2078                allocated_size = ni->allocated_size;
2079                read_unlock_irqrestore(&ni->size_lock, flags);
2080                if (start < 0 || start >= allocated_size)
2081                        ntfs_error(vol->sb, "Cannot extend allocation of "
2082                                        "inode 0x%lx, attribute type 0x%x, "
2083                                        "because the conversion from resident "
2084                                        "to non-resident attribute failed "
2085                                        "with error code %i.", vi->i_ino,
2086                                        (unsigned)le32_to_cpu(ni->type), err);
2087                if (err != -ENOMEM)
2088                        err = -EIO;
2089                goto conv_err_out;
2090        }
2091        /* TODO: Not implemented from here, abort. */
2092        read_lock_irqsave(&ni->size_lock, flags);
2093        allocated_size = ni->allocated_size;
2094        read_unlock_irqrestore(&ni->size_lock, flags);
2095        if (start < 0 || start >= allocated_size) {
2096                if (err == -ENOSPC)
2097                        ntfs_error(vol->sb, "Not enough space in the mft "
2098                                        "record/on disk for the non-resident "
2099                                        "attribute value.  This case is not "
2100                                        "implemented yet.");
2101                else /* if (err == -EPERM) */
2102                        ntfs_error(vol->sb, "This attribute type may not be "
2103                                        "non-resident.  This case is not "
2104                                        "implemented yet.");
2105        }
2106        err = -EOPNOTSUPP;
2107        goto conv_err_out;
2108#if 0
2109        // TODO: Attempt to make other attributes non-resident.
2110        if (!err)
2111                goto do_resident_extend;
2112        /*
2113         * Both the attribute list attribute and the standard information
2114         * attribute must remain in the base inode.  Thus, if this is one of
2115         * these attributes, we have to try to move other attributes out into
2116         * extent mft records instead.
2117         */
2118        if (ni->type == AT_ATTRIBUTE_LIST ||
2119                        ni->type == AT_STANDARD_INFORMATION) {
2120                // TODO: Attempt to move other attributes into extent mft
2121                // records.
2122                err = -EOPNOTSUPP;
2123                if (!err)
2124                        goto do_resident_extend;
2125                goto err_out;
2126        }
2127        // TODO: Attempt to move this attribute to an extent mft record, but
2128        // only if it is not already the only attribute in an mft record in
2129        // which case there would be nothing to gain.
2130        err = -EOPNOTSUPP;
2131        if (!err)
2132                goto do_resident_extend;
2133        /* There is nothing we can do to make enough space. )-: */
2134        goto err_out;
2135#endif
2136do_non_resident_extend:
2137        BUG_ON(!NInoNonResident(ni));
2138        if (new_alloc_size == allocated_size) {
2139                BUG_ON(vcn);
2140                goto alloc_done;
2141        }
2142        /*
2143         * If the data starts after the end of the old allocation, this is a
2144         * $DATA attribute and sparse attributes are enabled on the volume and
2145         * for this inode, then create a sparse region between the old
2146         * allocated size and the start of the data.  Otherwise simply proceed
2147         * with filling the whole space between the old allocated size and the
2148         * new allocated size with clusters.
2149         */
2150        if ((start >= 0 && start <= allocated_size) || ni->type != AT_DATA ||
2151                        !NVolSparseEnabled(vol) || NInoSparseDisabled(ni))
2152                goto skip_sparse;
2153        // TODO: This is not implemented yet.  We just fill in with real
2154        // clusters for now...
2155        ntfs_debug("Inserting holes is not-implemented yet.  Falling back to "
2156                        "allocating real clusters instead.");
2157skip_sparse:
2158        rl = ni->runlist.rl;
2159        if (likely(rl)) {
2160                /* Seek to the end of the runlist. */
2161                while (rl->length)
2162                        rl++;
2163        }
2164        /* If this attribute extent is not mapped, map it now. */
2165        if (unlikely(!rl || rl->lcn == LCN_RL_NOT_MAPPED ||
2166                        (rl->lcn == LCN_ENOENT && rl > ni->runlist.rl &&
2167                        (rl-1)->lcn == LCN_RL_NOT_MAPPED))) {
2168                if (!rl && !allocated_size)
2169                        goto first_alloc;
2170                rl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
2171                if (IS_ERR(rl)) {
2172                        err = PTR_ERR(rl);
2173                        if (start < 0 || start >= allocated_size)
2174                                ntfs_error(vol->sb, "Cannot extend allocation "
2175                                                "of inode 0x%lx, attribute "
2176                                                "type 0x%x, because the "
2177                                                "mapping of a runlist "
2178                                                "fragment failed with error "
2179                                                "code %i.", vi->i_ino,
2180                                                (unsigned)le32_to_cpu(ni->type),
2181                                                err);
2182                        if (err != -ENOMEM)
2183                                err = -EIO;
2184                        goto err_out;
2185                }
2186                ni->runlist.rl = rl;
2187                /* Seek to the end of the runlist. */
2188                while (rl->length)
2189                        rl++;
2190        }
2191        /*
2192         * We now know the runlist of the last extent is mapped and @rl is at
2193         * the end of the runlist.  We want to begin allocating clusters
2194         * starting at the last allocated cluster to reduce fragmentation.  If
2195         * there are no valid LCNs in the attribute we let the cluster
2196         * allocator choose the starting cluster.
2197         */
2198        /* If the last LCN is a hole or simillar seek back to last real LCN. */
2199        while (rl->lcn < 0 && rl > ni->runlist.rl)
2200                rl--;
2201first_alloc:
2202        // FIXME: Need to implement partial allocations so at least part of the
2203        // write can be performed when start >= 0.  (Needed for POSIX write(2)
2204        // conformance.)
2205        rl2 = ntfs_cluster_alloc(vol, allocated_size >> vol->cluster_size_bits,
2206                        (new_alloc_size - allocated_size) >>
2207                        vol->cluster_size_bits, (rl && (rl->lcn >= 0)) ?
2208                        rl->lcn + rl->length : -1, DATA_ZONE, true);
2209        if (IS_ERR(rl2)) {
2210                err = PTR_ERR(rl2);
2211                if (start < 0 || start >= allocated_size)
2212                        ntfs_error(vol->sb, "Cannot extend allocation of "
2213                                        "inode 0x%lx, attribute type 0x%x, "
2214                                        "because the allocation of clusters "
2215                                        "failed with error code %i.", vi->i_ino,
2216                                        (unsigned)le32_to_cpu(ni->type), err);
2217                if (err != -ENOMEM && err != -ENOSPC)
2218                        err = -EIO;
2219                goto err_out;
2220        }
2221        rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
2222        if (IS_ERR(rl)) {
2223                err = PTR_ERR(rl);
2224                if (start < 0 || start >= allocated_size)
2225                        ntfs_error(vol->sb, "Cannot extend allocation of "
2226                                        "inode 0x%lx, attribute type 0x%x, "
2227                                        "because the runlist merge failed "
2228                                        "with error code %i.", vi->i_ino,
2229                                        (unsigned)le32_to_cpu(ni->type), err);
2230                if (err != -ENOMEM)
2231                        err = -EIO;
2232                if (ntfs_cluster_free_from_rl(vol, rl2)) {
2233                        ntfs_error(vol->sb, "Failed to release allocated "
2234                                        "cluster(s) in error code path.  Run "
2235                                        "chkdsk to recover the lost "
2236                                        "cluster(s).");
2237                        NVolSetErrors(vol);
2238                }
2239                ntfs_free(rl2);
2240                goto err_out;
2241        }
2242        ni->runlist.rl = rl;
2243        ntfs_debug("Allocated 0x%llx clusters.", (long long)(new_alloc_size -
2244                        allocated_size) >> vol->cluster_size_bits);
2245        /* Find the runlist element with which the attribute extent starts. */
2246        ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
2247        rl2 = ntfs_rl_find_vcn_nolock(rl, ll);
2248        BUG_ON(!rl2);
2249        BUG_ON(!rl2->length);
2250        BUG_ON(rl2->lcn < LCN_HOLE);
2251        mp_rebuilt = false;
2252        /* Get the size for the new mapping pairs array for this extent. */
2253        mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
2254        if (unlikely(mp_size <= 0)) {
2255                err = mp_size;
2256                if (start < 0 || start >= allocated_size)
2257                        ntfs_error(vol->sb, "Cannot extend allocation of "
2258                                        "inode 0x%lx, attribute type 0x%x, "
2259                                        "because determining the size for the "
2260                                        "mapping pairs failed with error code "
2261                                        "%i.", vi->i_ino,
2262                                        (unsigned)le32_to_cpu(ni->type), err);
2263                err = -EIO;
2264                goto undo_alloc;
2265        }
2266        /* Extend the attribute record to fit the bigger mapping pairs array. */
2267        attr_len = le32_to_cpu(a->length);
2268        err = ntfs_attr_record_resize(m, a, mp_size +
2269                        le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
2270        if (unlikely(err)) {
2271                BUG_ON(err != -ENOSPC);
2272                // TODO: Deal with this by moving this extent to a new mft
2273                // record or by starting a new extent in a new mft record,
2274                // possibly by extending this extent partially and filling it
2275                // and creating a new extent for the remainder, or by making
2276                // other attributes non-resident and/or by moving other
2277                // attributes out of this mft record.
2278                if (start < 0 || start >= allocated_size)
2279                        ntfs_error(vol->sb, "Not enough space in the mft "
2280                                        "record for the extended attribute "
2281                                        "record.  This case is not "
2282                                        "implemented yet.");
2283                err = -EOPNOTSUPP;
2284                goto undo_alloc;
2285        }
2286        mp_rebuilt = true;
2287        /* Generate the mapping pairs array directly into the attr record. */
2288        err = ntfs_mapping_pairs_build(vol, (u8*)a +
2289                        le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
2290                        mp_size, rl2, ll, -1, NULL);
2291        if (unlikely(err)) {
2292                if (start < 0 || start >= allocated_size)
2293                        ntfs_error(vol->sb, "Cannot extend allocation of "
2294                                        "inode 0x%lx, attribute type 0x%x, "
2295                                        "because building the mapping pairs "
2296                                        "failed with error code %i.", vi->i_ino,
2297                                        (unsigned)le32_to_cpu(ni->type), err);
2298                err = -EIO;
2299                goto undo_alloc;
2300        }
2301        /* Update the highest_vcn. */
2302        a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
2303                        vol->cluster_size_bits) - 1);
2304        /*
2305         * We now have extended the allocated size of the attribute.  Reflect
2306         * this in the ntfs_inode structure and the attribute record.
2307         */
2308        if (a->data.non_resident.lowest_vcn) {
2309                /*
2310                 * We are not in the first attribute extent, switch to it, but
2311                 * first ensure the changes will make it to disk later.
2312                 */
2313                flush_dcache_mft_record_page(ctx->ntfs_ino);
2314                mark_mft_record_dirty(ctx->ntfs_ino);
2315                ntfs_attr_reinit_search_ctx(ctx);
2316                err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2317                                CASE_SENSITIVE, 0, NULL, 0, ctx);
2318                if (unlikely(err))
2319                        goto restore_undo_alloc;
2320                /* @m is not used any more so no need to set it. */
2321                a = ctx->attr;
2322        }
2323        write_lock_irqsave(&ni->size_lock, flags);
2324        ni->allocated_size = new_alloc_size;
2325        a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
2326        /*
2327         * FIXME: This would fail if @ni is a directory, $MFT, or an index,
2328         * since those can have sparse/compressed set.  For example can be
2329         * set compressed even though it is not compressed itself and in that
2330         * case the bit means that files are to be created compressed in the
2331         * directory...  At present this is ok as this code is only called for
2332         * regular files, and only for their $DATA attribute(s).
2333         * FIXME: The calculation is wrong if we created a hole above.  For now
2334         * it does not matter as we never create holes.
2335         */
2336        if (NInoSparse(ni) || NInoCompressed(ni)) {
2337                ni->itype.compressed.size += new_alloc_size - allocated_size;
2338                a->data.non_resident.compressed_size =
2339                                cpu_to_sle64(ni->itype.compressed.size);
2340                vi->i_blocks = ni->itype.compressed.size >> 9;
2341        } else
2342                vi->i_blocks = new_alloc_size >> 9;
2343        write_unlock_irqrestore(&ni->size_lock, flags);
2344alloc_done:
2345        if (new_data_size >= 0) {
2346                BUG_ON(new_data_size <
2347                                sle64_to_cpu(a->data.non_resident.data_size));
2348                a->data.non_resident.data_size = cpu_to_sle64(new_data_size);
2349        }
2350flush_done:
2351        /* Ensure the changes make it to disk. */
2352        flush_dcache_mft_record_page(ctx->ntfs_ino);
2353        mark_mft_record_dirty(ctx->ntfs_ino);
2354done:
2355        ntfs_attr_put_search_ctx(ctx);
2356        unmap_mft_record(base_ni);
2357        up_write(&ni->runlist.lock);
2358        ntfs_debug("Done, new_allocated_size 0x%llx.",
2359                        (unsigned long long)new_alloc_size);
2360        return new_alloc_size;
2361restore_undo_alloc:
2362        if (start < 0 || start >= allocated_size)
2363                ntfs_error(vol->sb, "Cannot complete extension of allocation "
2364                                "of inode 0x%lx, attribute type 0x%x, because "
2365                                "lookup of first attribute extent failed with "
2366                                "error code %i.", vi->i_ino,
2367                                (unsigned)le32_to_cpu(ni->type), err);
2368        if (err == -ENOENT)
2369                err = -EIO;
2370        ntfs_attr_reinit_search_ctx(ctx);
2371        if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
2372                        allocated_size >> vol->cluster_size_bits, NULL, 0,
2373                        ctx)) {
2374                ntfs_error(vol->sb, "Failed to find last attribute extent of "
2375                                "attribute in error code path.  Run chkdsk to "
2376                                "recover.");
2377                write_lock_irqsave(&ni->size_lock, flags);
2378                ni->allocated_size = new_alloc_size;
2379                /*
2380                 * FIXME: This would fail if @ni is a directory...  See above.
2381                 * FIXME: The calculation is wrong if we created a hole above.
2382                 * For now it does not matter as we never create holes.
2383                 */
2384                if (NInoSparse(ni) || NInoCompressed(ni)) {
2385                        ni->itype.compressed.size += new_alloc_size -
2386                                        allocated_size;
2387                        vi->i_blocks = ni->itype.compressed.size >> 9;
2388                } else
2389                        vi->i_blocks = new_alloc_size >> 9;
2390                write_unlock_irqrestore(&ni->size_lock, flags);
2391                ntfs_attr_put_search_ctx(ctx);
2392                unmap_mft_record(base_ni);
2393                up_write(&ni->runlist.lock);
2394                /*
2395                 * The only thing that is now wrong is the allocated size of the
2396                 * base attribute extent which chkdsk should be able to fix.
2397                 */
2398                NVolSetErrors(vol);
2399                return err;
2400        }
2401        ctx->attr->data.non_resident.highest_vcn = cpu_to_sle64(
2402                        (allocated_size >> vol->cluster_size_bits) - 1);
2403undo_alloc:
2404        ll = allocated_size >> vol->cluster_size_bits;
2405        if (ntfs_cluster_free(ni, ll, -1, ctx) < 0) {
2406                ntfs_error(vol->sb, "Failed to release allocated cluster(s) "
2407                                "in error code path.  Run chkdsk to recover "
2408                                "the lost cluster(s).");
2409                NVolSetErrors(vol);
2410        }
2411        m = ctx->mrec;
2412        a = ctx->attr;
2413        /*
2414         * If the runlist truncation fails and/or the search context is no
2415         * longer valid, we cannot resize the attribute record or build the
2416         * mapping pairs array thus we mark the inode bad so that no access to
2417         * the freed clusters can happen.
2418         */
2419        if (ntfs_rl_truncate_nolock(vol, &ni->runlist, ll) || IS_ERR(m)) {
2420                ntfs_error(vol->sb, "Failed to %s in error code path.  Run "
2421                                "chkdsk to recover.", IS_ERR(m) ?
2422                                "restore attribute search context" :
2423                                "truncate attribute runlist");
2424                NVolSetErrors(vol);
2425        } else if (mp_rebuilt) {
2426                if (ntfs_attr_record_resize(m, a, attr_len)) {
2427                        ntfs_error(vol->sb, "Failed to restore attribute "
2428                                        "record in error code path.  Run "
2429                                        "chkdsk to recover.");
2430                        NVolSetErrors(vol);
2431                } else /* if (success) */ {
2432                        if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
2433                                        a->data.non_resident.
2434                                        mapping_pairs_offset), attr_len -
2435                                        le16_to_cpu(a->data.non_resident.
2436                                        mapping_pairs_offset), rl2, ll, -1,
2437                                        NULL)) {
2438                                ntfs_error(vol->sb, "Failed to restore "
2439                                                "mapping pairs array in error "
2440                                                "code path.  Run chkdsk to "
2441                                                "recover.");
2442                                NVolSetErrors(vol);
2443                        }
2444                        flush_dcache_mft_record_page(ctx->ntfs_ino);
2445                        mark_mft_record_dirty(ctx->ntfs_ino);
2446                }
2447        }
2448err_out:
2449        if (ctx)
2450                ntfs_attr_put_search_ctx(ctx);
2451        if (m)
2452                unmap_mft_record(base_ni);
2453        up_write(&ni->runlist.lock);
2454conv_err_out:
2455        ntfs_debug("Failed.  Returning error code %i.", err);
2456        return err;
2457}
2458
2459/**
2460 * ntfs_attr_set - fill (a part of) an attribute with a byte
2461 * @ni:         ntfs inode describing the attribute to fill
2462 * @ofs:        offset inside the attribute at which to start to fill
2463 * @cnt:        number of bytes to fill
2464 * @val:        the unsigned 8-bit value with which to fill the attribute
2465 *
2466 * Fill @cnt bytes of the attribute described by the ntfs inode @ni starting at
2467 * byte offset @ofs inside the attribute with the constant byte @val.
2468 *
2469 * This function is effectively like memset() applied to an ntfs attribute.
2470 * Note thie function actually only operates on the page cache pages belonging
2471 * to the ntfs attribute and it marks them dirty after doing the memset().
2472 * Thus it relies on the vm dirty page write code paths to cause the modified
2473 * pages to be written to the mft record/disk.
2474 *
2475 * Return 0 on success and -errno on error.  An error code of -ESPIPE means
2476 * that @ofs + @cnt were outside the end of the attribute and no write was
2477 * performed.
2478 */
2479int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2480{
2481        ntfs_volume *vol = ni->vol;
2482        struct address_space *mapping;
2483        struct page *page;
2484        u8 *kaddr;
2485        pgoff_t idx, end;
2486        unsigned start_ofs, end_ofs, size;
2487
2488        ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.",
2489                        (long long)ofs, (long long)cnt, val);
2490        BUG_ON(ofs < 0);
2491        BUG_ON(cnt < 0);
2492        if (!cnt)
2493                goto done;
2494        /*
2495         * FIXME: Compressed and encrypted attributes are not supported when
2496         * writing and we should never have gotten here for them.
2497         */
2498        BUG_ON(NInoCompressed(ni));
2499        BUG_ON(NInoEncrypted(ni));
2500        mapping = VFS_I(ni)->i_mapping;
2501        /* Work out the starting index and page offset. */
2502        idx = ofs >> PAGE_SHIFT;
2503        start_ofs = ofs & ~PAGE_MASK;
2504        /* Work out the ending index and page offset. */
2505        end = ofs + cnt;
2506        end_ofs = end & ~PAGE_MASK;
2507        /* If the end is outside the inode size return -ESPIPE. */
2508        if (unlikely(end > i_size_read(VFS_I(ni)))) {
2509                ntfs_error(vol->sb, "Request exceeds end of attribute.");
2510                return -ESPIPE;
2511        }
2512        end >>= PAGE_SHIFT;
2513        /* If there is a first partial page, need to do it the slow way. */
2514        if (start_ofs) {
2515                page = read_mapping_page(mapping, idx, NULL);
2516                if (IS_ERR(page)) {
2517                        ntfs_error(vol->sb, "Failed to read first partial "
2518                                        "page (error, index 0x%lx).", idx);
2519                        return PTR_ERR(page);
2520                }
2521                /*
2522                 * If the last page is the same as the first page, need to
2523                 * limit the write to the end offset.
2524                 */
2525                size = PAGE_SIZE;
2526                if (idx == end)
2527                        size = end_ofs;
2528                kaddr = kmap_atomic(page);
2529                memset(kaddr + start_ofs, val, size - start_ofs);
2530                flush_dcache_page(page);
2531                kunmap_atomic(kaddr);
2532                set_page_dirty(page);
2533                put_page(page);
2534                balance_dirty_pages_ratelimited(mapping);
2535                cond_resched();
2536                if (idx == end)
2537                        goto done;
2538                idx++;
2539        }
2540        /* Do the whole pages the fast way. */
2541        for (; idx < end; idx++) {
2542                /* Find or create the current page.  (The page is locked.) */
2543                page = grab_cache_page(mapping, idx);
2544                if (unlikely(!page)) {
2545                        ntfs_error(vol->sb, "Insufficient memory to grab "
2546                                        "page (index 0x%lx).", idx);
2547                        return -ENOMEM;
2548                }
2549                kaddr = kmap_atomic(page);
2550                memset(kaddr, val, PAGE_SIZE);
2551                flush_dcache_page(page);
2552                kunmap_atomic(kaddr);
2553                /*
2554                 * If the page has buffers, mark them uptodate since buffer
2555                 * state and not page state is definitive in 2.6 kernels.
2556                 */
2557                if (page_has_buffers(page)) {
2558                        struct buffer_head *bh, *head;
2559
2560                        bh = head = page_buffers(page);
2561                        do {
2562                                set_buffer_uptodate(bh);
2563                        } while ((bh = bh->b_this_page) != head);
2564                }
2565                /* Now that buffers are uptodate, set the page uptodate, too. */
2566                SetPageUptodate(page);
2567                /*
2568                 * Set the page and all its buffers dirty and mark the inode
2569                 * dirty, too.  The VM will write the page later on.
2570                 */
2571                set_page_dirty(page);
2572                /* Finally unlock and release the page. */
2573                unlock_page(page);
2574                put_page(page);
2575                balance_dirty_pages_ratelimited(mapping);
2576                cond_resched();
2577        }
2578        /* If there is a last partial page, need to do it the slow way. */
2579        if (end_ofs) {
2580                page = read_mapping_page(mapping, idx, NULL);
2581                if (IS_ERR(page)) {
2582                        ntfs_error(vol->sb, "Failed to read last partial page "
2583                                        "(error, index 0x%lx).", idx);
2584                        return PTR_ERR(page);
2585                }
2586                kaddr = kmap_atomic(page);
2587                memset(kaddr, val, end_ofs);
2588                flush_dcache_page(page);
2589                kunmap_atomic(kaddr);
2590                set_page_dirty(page);
2591                put_page(page);
2592                balance_dirty_pages_ratelimited(mapping);
2593                cond_resched();
2594        }
2595done:
2596        ntfs_debug("Done.");
2597        return 0;
2598}
2599
2600#endif /* NTFS_RW */
2601