linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* -*- mode: c; c-basic-offset: 8; -*-
   3 * vim: noexpandtab sw=8 ts=8 sts=0:
   4 *
   5 * extent_map.c
   6 *
   7 * Block/Cluster mapping functions
   8 *
   9 * Copyright (C) 2004 Oracle.  All rights reserved.
  10 */
  11
  12#include <linux/fs.h>
  13#include <linux/init.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/fiemap.h>
  17
  18#include <cluster/masklog.h>
  19
  20#include "ocfs2.h"
  21
  22#include "alloc.h"
  23#include "dlmglue.h"
  24#include "extent_map.h"
  25#include "inode.h"
  26#include "super.h"
  27#include "symlink.h"
  28#include "aops.h"
  29#include "ocfs2_trace.h"
  30
  31#include "buffer_head_io.h"
  32
  33/*
  34 * The extent caching implementation is intentionally trivial.
  35 *
  36 * We only cache a small number of extents stored directly on the
  37 * inode, so linear order operations are acceptable. If we ever want
  38 * to increase the size of the extent map, then these algorithms must
  39 * get smarter.
  40 */
  41
  42void ocfs2_extent_map_init(struct inode *inode)
  43{
  44        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  45
  46        oi->ip_extent_map.em_num_items = 0;
  47        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  48}
  49
  50static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  51                                      unsigned int cpos,
  52                                      struct ocfs2_extent_map_item **ret_emi)
  53{
  54        unsigned int range;
  55        struct ocfs2_extent_map_item *emi;
  56
  57        *ret_emi = NULL;
  58
  59        list_for_each_entry(emi, &em->em_list, ei_list) {
  60                range = emi->ei_cpos + emi->ei_clusters;
  61
  62                if (cpos >= emi->ei_cpos && cpos < range) {
  63                        list_move(&emi->ei_list, &em->em_list);
  64
  65                        *ret_emi = emi;
  66                        break;
  67                }
  68        }
  69}
  70
  71static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  72                                   unsigned int *phys, unsigned int *len,
  73                                   unsigned int *flags)
  74{
  75        unsigned int coff;
  76        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  77        struct ocfs2_extent_map_item *emi;
  78
  79        spin_lock(&oi->ip_lock);
  80
  81        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  82        if (emi) {
  83                coff = cpos - emi->ei_cpos;
  84                *phys = emi->ei_phys + coff;
  85                if (len)
  86                        *len = emi->ei_clusters - coff;
  87                if (flags)
  88                        *flags = emi->ei_flags;
  89        }
  90
  91        spin_unlock(&oi->ip_lock);
  92
  93        if (emi == NULL)
  94                return -ENOENT;
  95
  96        return 0;
  97}
  98
  99/*
 100 * Forget about all clusters equal to or greater than cpos.
 101 */
 102void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 103{
 104        struct ocfs2_extent_map_item *emi, *n;
 105        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 106        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 107        LIST_HEAD(tmp_list);
 108        unsigned int range;
 109
 110        spin_lock(&oi->ip_lock);
 111        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 112                if (emi->ei_cpos >= cpos) {
 113                        /* Full truncate of this record. */
 114                        list_move(&emi->ei_list, &tmp_list);
 115                        BUG_ON(em->em_num_items == 0);
 116                        em->em_num_items--;
 117                        continue;
 118                }
 119
 120                range = emi->ei_cpos + emi->ei_clusters;
 121                if (range > cpos) {
 122                        /* Partial truncate */
 123                        emi->ei_clusters = cpos - emi->ei_cpos;
 124                }
 125        }
 126        spin_unlock(&oi->ip_lock);
 127
 128        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 129                list_del(&emi->ei_list);
 130                kfree(emi);
 131        }
 132}
 133
 134/*
 135 * Is any part of emi2 contained within emi1
 136 */
 137static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 138                                 struct ocfs2_extent_map_item *emi2)
 139{
 140        unsigned int range1, range2;
 141
 142        /*
 143         * Check if logical start of emi2 is inside emi1
 144         */
 145        range1 = emi1->ei_cpos + emi1->ei_clusters;
 146        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 147                return 1;
 148
 149        /*
 150         * Check if logical end of emi2 is inside emi1
 151         */
 152        range2 = emi2->ei_cpos + emi2->ei_clusters;
 153        if (range2 > emi1->ei_cpos && range2 <= range1)
 154                return 1;
 155
 156        return 0;
 157}
 158
 159static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 160                                  struct ocfs2_extent_map_item *src)
 161{
 162        dest->ei_cpos = src->ei_cpos;
 163        dest->ei_phys = src->ei_phys;
 164        dest->ei_clusters = src->ei_clusters;
 165        dest->ei_flags = src->ei_flags;
 166}
 167
 168/*
 169 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 170 * otherwise.
 171 */
 172static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 173                                         struct ocfs2_extent_map_item *ins)
 174{
 175        /*
 176         * Handle contiguousness
 177         */
 178        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 179            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 180            ins->ei_flags == emi->ei_flags) {
 181                emi->ei_clusters += ins->ei_clusters;
 182                return 1;
 183        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 184                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 185                   ins->ei_flags == emi->ei_flags) {
 186                emi->ei_phys = ins->ei_phys;
 187                emi->ei_cpos = ins->ei_cpos;
 188                emi->ei_clusters += ins->ei_clusters;
 189                return 1;
 190        }
 191
 192        /*
 193         * Overlapping extents - this shouldn't happen unless we've
 194         * split an extent to change it's flags. That is exceedingly
 195         * rare, so there's no sense in trying to optimize it yet.
 196         */
 197        if (ocfs2_ei_is_contained(emi, ins) ||
 198            ocfs2_ei_is_contained(ins, emi)) {
 199                ocfs2_copy_emi_fields(emi, ins);
 200                return 1;
 201        }
 202
 203        /* No merge was possible. */
 204        return 0;
 205}
 206
 207/*
 208 * In order to reduce complexity on the caller, this insert function
 209 * is intentionally liberal in what it will accept.
 210 *
 211 * The only rule is that the truncate call *must* be used whenever
 212 * records have been deleted. This avoids inserting overlapping
 213 * records with different physical mappings.
 214 */
 215void ocfs2_extent_map_insert_rec(struct inode *inode,
 216                                 struct ocfs2_extent_rec *rec)
 217{
 218        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 219        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 220        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 221        struct ocfs2_extent_map_item ins;
 222
 223        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 224        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 225                                               le64_to_cpu(rec->e_blkno));
 226        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 227        ins.ei_flags = rec->e_flags;
 228
 229search:
 230        spin_lock(&oi->ip_lock);
 231
 232        list_for_each_entry(emi, &em->em_list, ei_list) {
 233                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 234                        list_move(&emi->ei_list, &em->em_list);
 235                        spin_unlock(&oi->ip_lock);
 236                        goto out;
 237                }
 238        }
 239
 240        /*
 241         * No item could be merged.
 242         *
 243         * Either allocate and add a new item, or overwrite the last recently
 244         * inserted.
 245         */
 246
 247        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 248                if (new_emi == NULL) {
 249                        spin_unlock(&oi->ip_lock);
 250
 251                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 252                        if (new_emi == NULL)
 253                                goto out;
 254
 255                        goto search;
 256                }
 257
 258                ocfs2_copy_emi_fields(new_emi, &ins);
 259                list_add(&new_emi->ei_list, &em->em_list);
 260                em->em_num_items++;
 261                new_emi = NULL;
 262        } else {
 263                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 264                emi = list_entry(em->em_list.prev,
 265                                 struct ocfs2_extent_map_item, ei_list);
 266                list_move(&emi->ei_list, &em->em_list);
 267                ocfs2_copy_emi_fields(emi, &ins);
 268        }
 269
 270        spin_unlock(&oi->ip_lock);
 271
 272out:
 273        kfree(new_emi);
 274}
 275
 276static int ocfs2_last_eb_is_empty(struct inode *inode,
 277                                  struct ocfs2_dinode *di)
 278{
 279        int ret, next_free;
 280        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 281        struct buffer_head *eb_bh = NULL;
 282        struct ocfs2_extent_block *eb;
 283        struct ocfs2_extent_list *el;
 284
 285        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 286        if (ret) {
 287                mlog_errno(ret);
 288                goto out;
 289        }
 290
 291        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 292        el = &eb->h_list;
 293
 294        if (el->l_tree_depth) {
 295                ocfs2_error(inode->i_sb,
 296                            "Inode %lu has non zero tree depth in leaf block %llu\n",
 297                            inode->i_ino,
 298                            (unsigned long long)eb_bh->b_blocknr);
 299                ret = -EROFS;
 300                goto out;
 301        }
 302
 303        next_free = le16_to_cpu(el->l_next_free_rec);
 304
 305        if (next_free == 0 ||
 306            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 307                ret = 1;
 308
 309out:
 310        brelse(eb_bh);
 311        return ret;
 312}
 313
 314/*
 315 * Return the 1st index within el which contains an extent start
 316 * larger than v_cluster.
 317 */
 318static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 319                                       u32 v_cluster)
 320{
 321        int i;
 322        struct ocfs2_extent_rec *rec;
 323
 324        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 325                rec = &el->l_recs[i];
 326
 327                if (v_cluster < le32_to_cpu(rec->e_cpos))
 328                        break;
 329        }
 330
 331        return i;
 332}
 333
 334/*
 335 * Figure out the size of a hole which starts at v_cluster within the given
 336 * extent list.
 337 *
 338 * If there is no more allocation past v_cluster, we return the maximum
 339 * cluster size minus v_cluster.
 340 *
 341 * If we have in-inode extents, then el points to the dinode list and
 342 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 343 * containing el.
 344 */
 345int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 346                               struct ocfs2_extent_list *el,
 347                               struct buffer_head *eb_bh,
 348                               u32 v_cluster,
 349                               u32 *num_clusters)
 350{
 351        int ret, i;
 352        struct buffer_head *next_eb_bh = NULL;
 353        struct ocfs2_extent_block *eb, *next_eb;
 354
 355        i = ocfs2_search_for_hole_index(el, v_cluster);
 356
 357        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 358                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 359
 360                /*
 361                 * Check the next leaf for any extents.
 362                 */
 363
 364                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 365                        goto no_more_extents;
 366
 367                ret = ocfs2_read_extent_block(ci,
 368                                              le64_to_cpu(eb->h_next_leaf_blk),
 369                                              &next_eb_bh);
 370                if (ret) {
 371                        mlog_errno(ret);
 372                        goto out;
 373                }
 374
 375                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 376                el = &next_eb->h_list;
 377                i = ocfs2_search_for_hole_index(el, v_cluster);
 378        }
 379
 380no_more_extents:
 381        if (i == le16_to_cpu(el->l_next_free_rec)) {
 382                /*
 383                 * We're at the end of our existing allocation. Just
 384                 * return the maximum number of clusters we could
 385                 * possibly allocate.
 386                 */
 387                *num_clusters = UINT_MAX - v_cluster;
 388        } else {
 389                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 390        }
 391
 392        ret = 0;
 393out:
 394        brelse(next_eb_bh);
 395        return ret;
 396}
 397
 398static int ocfs2_get_clusters_nocache(struct inode *inode,
 399                                      struct buffer_head *di_bh,
 400                                      u32 v_cluster, unsigned int *hole_len,
 401                                      struct ocfs2_extent_rec *ret_rec,
 402                                      unsigned int *is_last)
 403{
 404        int i, ret, tree_height, len;
 405        struct ocfs2_dinode *di;
 406        struct ocfs2_extent_block *uninitialized_var(eb);
 407        struct ocfs2_extent_list *el;
 408        struct ocfs2_extent_rec *rec;
 409        struct buffer_head *eb_bh = NULL;
 410
 411        memset(ret_rec, 0, sizeof(*ret_rec));
 412        if (is_last)
 413                *is_last = 0;
 414
 415        di = (struct ocfs2_dinode *) di_bh->b_data;
 416        el = &di->id2.i_list;
 417        tree_height = le16_to_cpu(el->l_tree_depth);
 418
 419        if (tree_height > 0) {
 420                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 421                                      &eb_bh);
 422                if (ret) {
 423                        mlog_errno(ret);
 424                        goto out;
 425                }
 426
 427                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 428                el = &eb->h_list;
 429
 430                if (el->l_tree_depth) {
 431                        ocfs2_error(inode->i_sb,
 432                                    "Inode %lu has non zero tree depth in leaf block %llu\n",
 433                                    inode->i_ino,
 434                                    (unsigned long long)eb_bh->b_blocknr);
 435                        ret = -EROFS;
 436                        goto out;
 437                }
 438        }
 439
 440        i = ocfs2_search_extent_list(el, v_cluster);
 441        if (i == -1) {
 442                /*
 443                 * Holes can be larger than the maximum size of an
 444                 * extent, so we return their lengths in a separate
 445                 * field.
 446                 */
 447                if (hole_len) {
 448                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 449                                                         el, eb_bh,
 450                                                         v_cluster, &len);
 451                        if (ret) {
 452                                mlog_errno(ret);
 453                                goto out;
 454                        }
 455
 456                        *hole_len = len;
 457                }
 458                goto out_hole;
 459        }
 460
 461        rec = &el->l_recs[i];
 462
 463        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 464
 465        if (!rec->e_blkno) {
 466                ocfs2_error(inode->i_sb,
 467                            "Inode %lu has bad extent record (%u, %u, 0)\n",
 468                            inode->i_ino,
 469                            le32_to_cpu(rec->e_cpos),
 470                            ocfs2_rec_clusters(el, rec));
 471                ret = -EROFS;
 472                goto out;
 473        }
 474
 475        *ret_rec = *rec;
 476
 477        /*
 478         * Checking for last extent is potentially expensive - we
 479         * might have to look at the next leaf over to see if it's
 480         * empty.
 481         *
 482         * The first two checks are to see whether the caller even
 483         * cares for this information, and if the extent is at least
 484         * the last in it's list.
 485         *
 486         * If those hold true, then the extent is last if any of the
 487         * additional conditions hold true:
 488         *  - Extent list is in-inode
 489         *  - Extent list is right-most
 490         *  - Extent list is 2nd to rightmost, with empty right-most
 491         */
 492        if (is_last) {
 493                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 494                        if (tree_height == 0)
 495                                *is_last = 1;
 496                        else if (eb->h_blkno == di->i_last_eb_blk)
 497                                *is_last = 1;
 498                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 499                                ret = ocfs2_last_eb_is_empty(inode, di);
 500                                if (ret < 0) {
 501                                        mlog_errno(ret);
 502                                        goto out;
 503                                }
 504                                if (ret == 1)
 505                                        *is_last = 1;
 506                        }
 507                }
 508        }
 509
 510out_hole:
 511        ret = 0;
 512out:
 513        brelse(eb_bh);
 514        return ret;
 515}
 516
 517static void ocfs2_relative_extent_offsets(struct super_block *sb,
 518                                          u32 v_cluster,
 519                                          struct ocfs2_extent_rec *rec,
 520                                          u32 *p_cluster, u32 *num_clusters)
 521
 522{
 523        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 524
 525        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 526        *p_cluster = *p_cluster + coff;
 527
 528        if (num_clusters)
 529                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 530}
 531
 532int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 533                             u32 *p_cluster, u32 *num_clusters,
 534                             struct ocfs2_extent_list *el,
 535                             unsigned int *extent_flags)
 536{
 537        int ret = 0, i;
 538        struct buffer_head *eb_bh = NULL;
 539        struct ocfs2_extent_block *eb;
 540        struct ocfs2_extent_rec *rec;
 541        u32 coff;
 542
 543        if (el->l_tree_depth) {
 544                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 545                                      &eb_bh);
 546                if (ret) {
 547                        mlog_errno(ret);
 548                        goto out;
 549                }
 550
 551                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 552                el = &eb->h_list;
 553
 554                if (el->l_tree_depth) {
 555                        ocfs2_error(inode->i_sb,
 556                                    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
 557                                    inode->i_ino,
 558                                    (unsigned long long)eb_bh->b_blocknr);
 559                        ret = -EROFS;
 560                        goto out;
 561                }
 562        }
 563
 564        i = ocfs2_search_extent_list(el, v_cluster);
 565        if (i == -1) {
 566                ret = -EROFS;
 567                mlog_errno(ret);
 568                goto out;
 569        } else {
 570                rec = &el->l_recs[i];
 571                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 572
 573                if (!rec->e_blkno) {
 574                        ocfs2_error(inode->i_sb,
 575                                    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
 576                                    inode->i_ino,
 577                                    le32_to_cpu(rec->e_cpos),
 578                                    ocfs2_rec_clusters(el, rec));
 579                        ret = -EROFS;
 580                        goto out;
 581                }
 582                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 583                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 584                                                    le64_to_cpu(rec->e_blkno));
 585                *p_cluster = *p_cluster + coff;
 586                if (num_clusters)
 587                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 588
 589                if (extent_flags)
 590                        *extent_flags = rec->e_flags;
 591        }
 592out:
 593        if (eb_bh)
 594                brelse(eb_bh);
 595        return ret;
 596}
 597
 598int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 599                       u32 *p_cluster, u32 *num_clusters,
 600                       unsigned int *extent_flags)
 601{
 602        int ret;
 603        unsigned int uninitialized_var(hole_len), flags = 0;
 604        struct buffer_head *di_bh = NULL;
 605        struct ocfs2_extent_rec rec;
 606
 607        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 608                ret = -ERANGE;
 609                mlog_errno(ret);
 610                goto out;
 611        }
 612
 613        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 614                                      num_clusters, extent_flags);
 615        if (ret == 0)
 616                goto out;
 617
 618        ret = ocfs2_read_inode_block(inode, &di_bh);
 619        if (ret) {
 620                mlog_errno(ret);
 621                goto out;
 622        }
 623
 624        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 625                                         &rec, NULL);
 626        if (ret) {
 627                mlog_errno(ret);
 628                goto out;
 629        }
 630
 631        if (rec.e_blkno == 0ULL) {
 632                /*
 633                 * A hole was found. Return some canned values that
 634                 * callers can key on. If asked for, num_clusters will
 635                 * be populated with the size of the hole.
 636                 */
 637                *p_cluster = 0;
 638                if (num_clusters) {
 639                        *num_clusters = hole_len;
 640                }
 641        } else {
 642                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 643                                              p_cluster, num_clusters);
 644                flags = rec.e_flags;
 645
 646                ocfs2_extent_map_insert_rec(inode, &rec);
 647        }
 648
 649        if (extent_flags)
 650                *extent_flags = flags;
 651
 652out:
 653        brelse(di_bh);
 654        return ret;
 655}
 656
 657/*
 658 * This expects alloc_sem to be held. The allocation cannot change at
 659 * all while the map is in the process of being updated.
 660 */
 661int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 662                                u64 *ret_count, unsigned int *extent_flags)
 663{
 664        int ret;
 665        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 666        u32 cpos, num_clusters, p_cluster;
 667        u64 boff = 0;
 668
 669        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 670
 671        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 672                                 extent_flags);
 673        if (ret) {
 674                mlog_errno(ret);
 675                goto out;
 676        }
 677
 678        /*
 679         * p_cluster == 0 indicates a hole.
 680         */
 681        if (p_cluster) {
 682                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 683                boff += (v_blkno & (u64)(bpc - 1));
 684        }
 685
 686        *p_blkno = boff;
 687
 688        if (ret_count) {
 689                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 690                *ret_count -= v_blkno & (u64)(bpc - 1);
 691        }
 692
 693out:
 694        return ret;
 695}
 696
 697/*
 698 * The ocfs2_fiemap_inline() may be a little bit misleading, since
 699 * it not only handles the fiemap for inlined files, but also deals
 700 * with the fast symlink, cause they have no difference for extent
 701 * mapping per se.
 702 */
 703static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 704                               struct fiemap_extent_info *fieinfo,
 705                               u64 map_start)
 706{
 707        int ret;
 708        unsigned int id_count;
 709        struct ocfs2_dinode *di;
 710        u64 phys;
 711        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 712        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 713
 714        di = (struct ocfs2_dinode *)di_bh->b_data;
 715        if (ocfs2_inode_is_fast_symlink(inode))
 716                id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 717        else
 718                id_count = le16_to_cpu(di->id2.i_data.id_count);
 719
 720        if (map_start < id_count) {
 721                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 722                if (ocfs2_inode_is_fast_symlink(inode))
 723                        phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 724                else
 725                        phys += offsetof(struct ocfs2_dinode,
 726                                         id2.i_data.id_data);
 727
 728                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 729                                              flags);
 730                if (ret < 0)
 731                        return ret;
 732        }
 733
 734        return 0;
 735}
 736
 737#define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
 738
 739int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 740                 u64 map_start, u64 map_len)
 741{
 742        int ret, is_last;
 743        u32 mapping_end, cpos;
 744        unsigned int hole_size;
 745        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 746        u64 len_bytes, phys_bytes, virt_bytes;
 747        struct buffer_head *di_bh = NULL;
 748        struct ocfs2_extent_rec rec;
 749
 750        ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
 751        if (ret)
 752                return ret;
 753
 754        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 755        if (ret) {
 756                mlog_errno(ret);
 757                goto out;
 758        }
 759
 760        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 761
 762        /*
 763         * Handle inline-data and fast symlink separately.
 764         */
 765        if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 766            ocfs2_inode_is_fast_symlink(inode)) {
 767                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 768                goto out_unlock;
 769        }
 770
 771        cpos = map_start >> osb->s_clustersize_bits;
 772        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 773                                               map_start + map_len);
 774        is_last = 0;
 775        while (cpos < mapping_end && !is_last) {
 776                u32 fe_flags;
 777
 778                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 779                                                 &hole_size, &rec, &is_last);
 780                if (ret) {
 781                        mlog_errno(ret);
 782                        goto out_unlock;
 783                }
 784
 785                if (rec.e_blkno == 0ULL) {
 786                        cpos += hole_size;
 787                        continue;
 788                }
 789
 790                fe_flags = 0;
 791                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 792                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 793                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 794                        fe_flags |= FIEMAP_EXTENT_SHARED;
 795                if (is_last)
 796                        fe_flags |= FIEMAP_EXTENT_LAST;
 797                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 798                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 799                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 800
 801                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 802                                              len_bytes, fe_flags);
 803                if (ret)
 804                        break;
 805
 806                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 807        }
 808
 809        if (ret > 0)
 810                ret = 0;
 811
 812out_unlock:
 813        brelse(di_bh);
 814
 815        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 816
 817        ocfs2_inode_unlock(inode, 0);
 818out:
 819
 820        return ret;
 821}
 822
 823/* Is IO overwriting allocated blocks? */
 824int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
 825                       u64 map_start, u64 map_len)
 826{
 827        int ret = 0, is_last;
 828        u32 mapping_end, cpos;
 829        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 830        struct ocfs2_extent_rec rec;
 831
 832        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 833                if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
 834                        return ret;
 835                else
 836                        return -EAGAIN;
 837        }
 838
 839        cpos = map_start >> osb->s_clustersize_bits;
 840        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 841                                               map_start + map_len);
 842        is_last = 0;
 843        while (cpos < mapping_end && !is_last) {
 844                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 845                                                 NULL, &rec, &is_last);
 846                if (ret) {
 847                        mlog_errno(ret);
 848                        goto out;
 849                }
 850
 851                if (rec.e_blkno == 0ULL)
 852                        break;
 853
 854                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 855                        break;
 856
 857                cpos = le32_to_cpu(rec.e_cpos) +
 858                        le16_to_cpu(rec.e_leaf_clusters);
 859        }
 860
 861        if (cpos < mapping_end)
 862                ret = -EAGAIN;
 863out:
 864        return ret;
 865}
 866
 867int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 868{
 869        struct inode *inode = file->f_mapping->host;
 870        int ret;
 871        unsigned int is_last = 0, is_data = 0;
 872        u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 873        u32 cpos, cend, clen, hole_size;
 874        u64 extoff, extlen;
 875        struct buffer_head *di_bh = NULL;
 876        struct ocfs2_extent_rec rec;
 877
 878        BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 879
 880        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 881        if (ret) {
 882                mlog_errno(ret);
 883                goto out;
 884        }
 885
 886        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 887
 888        if (*offset >= i_size_read(inode)) {
 889                ret = -ENXIO;
 890                goto out_unlock;
 891        }
 892
 893        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 894                if (whence == SEEK_HOLE)
 895                        *offset = i_size_read(inode);
 896                goto out_unlock;
 897        }
 898
 899        clen = 0;
 900        cpos = *offset >> cs_bits;
 901        cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 902
 903        while (cpos < cend && !is_last) {
 904                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 905                                                 &rec, &is_last);
 906                if (ret) {
 907                        mlog_errno(ret);
 908                        goto out_unlock;
 909                }
 910
 911                extoff = cpos;
 912                extoff <<= cs_bits;
 913
 914                if (rec.e_blkno == 0ULL) {
 915                        clen = hole_size;
 916                        is_data = 0;
 917                } else {
 918                        clen = le16_to_cpu(rec.e_leaf_clusters) -
 919                                (cpos - le32_to_cpu(rec.e_cpos));
 920                        is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 921                }
 922
 923                if ((!is_data && whence == SEEK_HOLE) ||
 924                    (is_data && whence == SEEK_DATA)) {
 925                        if (extoff > *offset)
 926                                *offset = extoff;
 927                        goto out_unlock;
 928                }
 929
 930                if (!is_last)
 931                        cpos += clen;
 932        }
 933
 934        if (whence == SEEK_HOLE) {
 935                extoff = cpos;
 936                extoff <<= cs_bits;
 937                extlen = clen;
 938                extlen <<=  cs_bits;
 939
 940                if ((extoff + extlen) > i_size_read(inode))
 941                        extlen = i_size_read(inode) - extoff;
 942                extoff += extlen;
 943                if (extoff > *offset)
 944                        *offset = extoff;
 945                goto out_unlock;
 946        }
 947
 948        ret = -ENXIO;
 949
 950out_unlock:
 951
 952        brelse(di_bh);
 953
 954        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 955
 956        ocfs2_inode_unlock(inode, 0);
 957out:
 958        return ret;
 959}
 960
 961int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 962                           struct buffer_head *bhs[], int flags,
 963                           int (*validate)(struct super_block *sb,
 964                                           struct buffer_head *bh))
 965{
 966        int rc = 0;
 967        u64 p_block, p_count;
 968        int i, count, done = 0;
 969
 970        trace_ocfs2_read_virt_blocks(
 971             inode, (unsigned long long)v_block, nr, bhs, flags,
 972             validate);
 973
 974        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 975            i_size_read(inode)) {
 976                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 977                goto out;
 978        }
 979
 980        while (done < nr) {
 981                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 982                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 983                                                 &p_block, &p_count, NULL);
 984                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 985                if (rc) {
 986                        mlog_errno(rc);
 987                        break;
 988                }
 989
 990                if (!p_block) {
 991                        rc = -EIO;
 992                        mlog(ML_ERROR,
 993                             "Inode #%llu contains a hole at offset %llu\n",
 994                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
 995                             (unsigned long long)(v_block + done) <<
 996                             inode->i_sb->s_blocksize_bits);
 997                        break;
 998                }
 999
1000                count = nr - done;
1001                if (p_count < count)
1002                        count = p_count;
1003
1004                /*
1005                 * If the caller passed us bhs, they should have come
1006                 * from a previous readahead call to this function.  Thus,
1007                 * they should have the right b_blocknr.
1008                 */
1009                for (i = 0; i < count; i++) {
1010                        if (!bhs[done + i])
1011                                continue;
1012                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1013                }
1014
1015                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
1016                                       bhs + done, flags, validate);
1017                if (rc) {
1018                        mlog_errno(rc);
1019                        break;
1020                }
1021                done += count;
1022        }
1023
1024out:
1025        return rc;
1026}
1027
1028
1029