linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * extent_map.c
   4 *
   5 * Block/Cluster mapping functions
   6 *
   7 * Copyright (C) 2004 Oracle.  All rights reserved.
   8 */
   9
  10#include <linux/fs.h>
  11#include <linux/init.h>
  12#include <linux/slab.h>
  13#include <linux/types.h>
  14#include <linux/fiemap.h>
  15
  16#include <cluster/masklog.h>
  17
  18#include "ocfs2.h"
  19
  20#include "alloc.h"
  21#include "dlmglue.h"
  22#include "extent_map.h"
  23#include "inode.h"
  24#include "super.h"
  25#include "symlink.h"
  26#include "aops.h"
  27#include "ocfs2_trace.h"
  28
  29#include "buffer_head_io.h"
  30
  31/*
  32 * The extent caching implementation is intentionally trivial.
  33 *
  34 * We only cache a small number of extents stored directly on the
  35 * inode, so linear order operations are acceptable. If we ever want
  36 * to increase the size of the extent map, then these algorithms must
  37 * get smarter.
  38 */
  39
  40void ocfs2_extent_map_init(struct inode *inode)
  41{
  42        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  43
  44        oi->ip_extent_map.em_num_items = 0;
  45        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  46}
  47
  48static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  49                                      unsigned int cpos,
  50                                      struct ocfs2_extent_map_item **ret_emi)
  51{
  52        unsigned int range;
  53        struct ocfs2_extent_map_item *emi;
  54
  55        *ret_emi = NULL;
  56
  57        list_for_each_entry(emi, &em->em_list, ei_list) {
  58                range = emi->ei_cpos + emi->ei_clusters;
  59
  60                if (cpos >= emi->ei_cpos && cpos < range) {
  61                        list_move(&emi->ei_list, &em->em_list);
  62
  63                        *ret_emi = emi;
  64                        break;
  65                }
  66        }
  67}
  68
  69static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  70                                   unsigned int *phys, unsigned int *len,
  71                                   unsigned int *flags)
  72{
  73        unsigned int coff;
  74        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  75        struct ocfs2_extent_map_item *emi;
  76
  77        spin_lock(&oi->ip_lock);
  78
  79        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  80        if (emi) {
  81                coff = cpos - emi->ei_cpos;
  82                *phys = emi->ei_phys + coff;
  83                if (len)
  84                        *len = emi->ei_clusters - coff;
  85                if (flags)
  86                        *flags = emi->ei_flags;
  87        }
  88
  89        spin_unlock(&oi->ip_lock);
  90
  91        if (emi == NULL)
  92                return -ENOENT;
  93
  94        return 0;
  95}
  96
  97/*
  98 * Forget about all clusters equal to or greater than cpos.
  99 */
 100void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 101{
 102        struct ocfs2_extent_map_item *emi, *n;
 103        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 104        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 105        LIST_HEAD(tmp_list);
 106        unsigned int range;
 107
 108        spin_lock(&oi->ip_lock);
 109        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 110                if (emi->ei_cpos >= cpos) {
 111                        /* Full truncate of this record. */
 112                        list_move(&emi->ei_list, &tmp_list);
 113                        BUG_ON(em->em_num_items == 0);
 114                        em->em_num_items--;
 115                        continue;
 116                }
 117
 118                range = emi->ei_cpos + emi->ei_clusters;
 119                if (range > cpos) {
 120                        /* Partial truncate */
 121                        emi->ei_clusters = cpos - emi->ei_cpos;
 122                }
 123        }
 124        spin_unlock(&oi->ip_lock);
 125
 126        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 127                list_del(&emi->ei_list);
 128                kfree(emi);
 129        }
 130}
 131
 132/*
 133 * Is any part of emi2 contained within emi1
 134 */
 135static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 136                                 struct ocfs2_extent_map_item *emi2)
 137{
 138        unsigned int range1, range2;
 139
 140        /*
 141         * Check if logical start of emi2 is inside emi1
 142         */
 143        range1 = emi1->ei_cpos + emi1->ei_clusters;
 144        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 145                return 1;
 146
 147        /*
 148         * Check if logical end of emi2 is inside emi1
 149         */
 150        range2 = emi2->ei_cpos + emi2->ei_clusters;
 151        if (range2 > emi1->ei_cpos && range2 <= range1)
 152                return 1;
 153
 154        return 0;
 155}
 156
 157static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 158                                  struct ocfs2_extent_map_item *src)
 159{
 160        dest->ei_cpos = src->ei_cpos;
 161        dest->ei_phys = src->ei_phys;
 162        dest->ei_clusters = src->ei_clusters;
 163        dest->ei_flags = src->ei_flags;
 164}
 165
 166/*
 167 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 168 * otherwise.
 169 */
 170static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 171                                         struct ocfs2_extent_map_item *ins)
 172{
 173        /*
 174         * Handle contiguousness
 175         */
 176        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 177            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 178            ins->ei_flags == emi->ei_flags) {
 179                emi->ei_clusters += ins->ei_clusters;
 180                return 1;
 181        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 182                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 183                   ins->ei_flags == emi->ei_flags) {
 184                emi->ei_phys = ins->ei_phys;
 185                emi->ei_cpos = ins->ei_cpos;
 186                emi->ei_clusters += ins->ei_clusters;
 187                return 1;
 188        }
 189
 190        /*
 191         * Overlapping extents - this shouldn't happen unless we've
 192         * split an extent to change it's flags. That is exceedingly
 193         * rare, so there's no sense in trying to optimize it yet.
 194         */
 195        if (ocfs2_ei_is_contained(emi, ins) ||
 196            ocfs2_ei_is_contained(ins, emi)) {
 197                ocfs2_copy_emi_fields(emi, ins);
 198                return 1;
 199        }
 200
 201        /* No merge was possible. */
 202        return 0;
 203}
 204
 205/*
 206 * In order to reduce complexity on the caller, this insert function
 207 * is intentionally liberal in what it will accept.
 208 *
 209 * The only rule is that the truncate call *must* be used whenever
 210 * records have been deleted. This avoids inserting overlapping
 211 * records with different physical mappings.
 212 */
 213void ocfs2_extent_map_insert_rec(struct inode *inode,
 214                                 struct ocfs2_extent_rec *rec)
 215{
 216        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 217        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 218        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 219        struct ocfs2_extent_map_item ins;
 220
 221        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 222        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 223                                               le64_to_cpu(rec->e_blkno));
 224        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 225        ins.ei_flags = rec->e_flags;
 226
 227search:
 228        spin_lock(&oi->ip_lock);
 229
 230        list_for_each_entry(emi, &em->em_list, ei_list) {
 231                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 232                        list_move(&emi->ei_list, &em->em_list);
 233                        spin_unlock(&oi->ip_lock);
 234                        goto out;
 235                }
 236        }
 237
 238        /*
 239         * No item could be merged.
 240         *
 241         * Either allocate and add a new item, or overwrite the last recently
 242         * inserted.
 243         */
 244
 245        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 246                if (new_emi == NULL) {
 247                        spin_unlock(&oi->ip_lock);
 248
 249                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 250                        if (new_emi == NULL)
 251                                goto out;
 252
 253                        goto search;
 254                }
 255
 256                ocfs2_copy_emi_fields(new_emi, &ins);
 257                list_add(&new_emi->ei_list, &em->em_list);
 258                em->em_num_items++;
 259                new_emi = NULL;
 260        } else {
 261                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 262                emi = list_entry(em->em_list.prev,
 263                                 struct ocfs2_extent_map_item, ei_list);
 264                list_move(&emi->ei_list, &em->em_list);
 265                ocfs2_copy_emi_fields(emi, &ins);
 266        }
 267
 268        spin_unlock(&oi->ip_lock);
 269
 270out:
 271        kfree(new_emi);
 272}
 273
 274static int ocfs2_last_eb_is_empty(struct inode *inode,
 275                                  struct ocfs2_dinode *di)
 276{
 277        int ret, next_free;
 278        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 279        struct buffer_head *eb_bh = NULL;
 280        struct ocfs2_extent_block *eb;
 281        struct ocfs2_extent_list *el;
 282
 283        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 284        if (ret) {
 285                mlog_errno(ret);
 286                goto out;
 287        }
 288
 289        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 290        el = &eb->h_list;
 291
 292        if (el->l_tree_depth) {
 293                ocfs2_error(inode->i_sb,
 294                            "Inode %lu has non zero tree depth in leaf block %llu\n",
 295                            inode->i_ino,
 296                            (unsigned long long)eb_bh->b_blocknr);
 297                ret = -EROFS;
 298                goto out;
 299        }
 300
 301        next_free = le16_to_cpu(el->l_next_free_rec);
 302
 303        if (next_free == 0 ||
 304            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 305                ret = 1;
 306
 307out:
 308        brelse(eb_bh);
 309        return ret;
 310}
 311
 312/*
 313 * Return the 1st index within el which contains an extent start
 314 * larger than v_cluster.
 315 */
 316static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 317                                       u32 v_cluster)
 318{
 319        int i;
 320        struct ocfs2_extent_rec *rec;
 321
 322        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 323                rec = &el->l_recs[i];
 324
 325                if (v_cluster < le32_to_cpu(rec->e_cpos))
 326                        break;
 327        }
 328
 329        return i;
 330}
 331
 332/*
 333 * Figure out the size of a hole which starts at v_cluster within the given
 334 * extent list.
 335 *
 336 * If there is no more allocation past v_cluster, we return the maximum
 337 * cluster size minus v_cluster.
 338 *
 339 * If we have in-inode extents, then el points to the dinode list and
 340 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 341 * containing el.
 342 */
 343int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 344                               struct ocfs2_extent_list *el,
 345                               struct buffer_head *eb_bh,
 346                               u32 v_cluster,
 347                               u32 *num_clusters)
 348{
 349        int ret, i;
 350        struct buffer_head *next_eb_bh = NULL;
 351        struct ocfs2_extent_block *eb, *next_eb;
 352
 353        i = ocfs2_search_for_hole_index(el, v_cluster);
 354
 355        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 356                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 357
 358                /*
 359                 * Check the next leaf for any extents.
 360                 */
 361
 362                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 363                        goto no_more_extents;
 364
 365                ret = ocfs2_read_extent_block(ci,
 366                                              le64_to_cpu(eb->h_next_leaf_blk),
 367                                              &next_eb_bh);
 368                if (ret) {
 369                        mlog_errno(ret);
 370                        goto out;
 371                }
 372
 373                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 374                el = &next_eb->h_list;
 375                i = ocfs2_search_for_hole_index(el, v_cluster);
 376        }
 377
 378no_more_extents:
 379        if (i == le16_to_cpu(el->l_next_free_rec)) {
 380                /*
 381                 * We're at the end of our existing allocation. Just
 382                 * return the maximum number of clusters we could
 383                 * possibly allocate.
 384                 */
 385                *num_clusters = UINT_MAX - v_cluster;
 386        } else {
 387                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 388        }
 389
 390        ret = 0;
 391out:
 392        brelse(next_eb_bh);
 393        return ret;
 394}
 395
 396static int ocfs2_get_clusters_nocache(struct inode *inode,
 397                                      struct buffer_head *di_bh,
 398                                      u32 v_cluster, unsigned int *hole_len,
 399                                      struct ocfs2_extent_rec *ret_rec,
 400                                      unsigned int *is_last)
 401{
 402        int i, ret, tree_height, len;
 403        struct ocfs2_dinode *di;
 404        struct ocfs2_extent_block *eb;
 405        struct ocfs2_extent_list *el;
 406        struct ocfs2_extent_rec *rec;
 407        struct buffer_head *eb_bh = NULL;
 408
 409        memset(ret_rec, 0, sizeof(*ret_rec));
 410        if (is_last)
 411                *is_last = 0;
 412
 413        di = (struct ocfs2_dinode *) di_bh->b_data;
 414        el = &di->id2.i_list;
 415        tree_height = le16_to_cpu(el->l_tree_depth);
 416
 417        if (tree_height > 0) {
 418                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 419                                      &eb_bh);
 420                if (ret) {
 421                        mlog_errno(ret);
 422                        goto out;
 423                }
 424
 425                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 426                el = &eb->h_list;
 427
 428                if (el->l_tree_depth) {
 429                        ocfs2_error(inode->i_sb,
 430                                    "Inode %lu has non zero tree depth in leaf block %llu\n",
 431                                    inode->i_ino,
 432                                    (unsigned long long)eb_bh->b_blocknr);
 433                        ret = -EROFS;
 434                        goto out;
 435                }
 436        }
 437
 438        i = ocfs2_search_extent_list(el, v_cluster);
 439        if (i == -1) {
 440                /*
 441                 * Holes can be larger than the maximum size of an
 442                 * extent, so we return their lengths in a separate
 443                 * field.
 444                 */
 445                if (hole_len) {
 446                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 447                                                         el, eb_bh,
 448                                                         v_cluster, &len);
 449                        if (ret) {
 450                                mlog_errno(ret);
 451                                goto out;
 452                        }
 453
 454                        *hole_len = len;
 455                }
 456                goto out_hole;
 457        }
 458
 459        rec = &el->l_recs[i];
 460
 461        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 462
 463        if (!rec->e_blkno) {
 464                ocfs2_error(inode->i_sb,
 465                            "Inode %lu has bad extent record (%u, %u, 0)\n",
 466                            inode->i_ino,
 467                            le32_to_cpu(rec->e_cpos),
 468                            ocfs2_rec_clusters(el, rec));
 469                ret = -EROFS;
 470                goto out;
 471        }
 472
 473        *ret_rec = *rec;
 474
 475        /*
 476         * Checking for last extent is potentially expensive - we
 477         * might have to look at the next leaf over to see if it's
 478         * empty.
 479         *
 480         * The first two checks are to see whether the caller even
 481         * cares for this information, and if the extent is at least
 482         * the last in it's list.
 483         *
 484         * If those hold true, then the extent is last if any of the
 485         * additional conditions hold true:
 486         *  - Extent list is in-inode
 487         *  - Extent list is right-most
 488         *  - Extent list is 2nd to rightmost, with empty right-most
 489         */
 490        if (is_last) {
 491                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 492                        if (tree_height == 0)
 493                                *is_last = 1;
 494                        else if (eb->h_blkno == di->i_last_eb_blk)
 495                                *is_last = 1;
 496                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 497                                ret = ocfs2_last_eb_is_empty(inode, di);
 498                                if (ret < 0) {
 499                                        mlog_errno(ret);
 500                                        goto out;
 501                                }
 502                                if (ret == 1)
 503                                        *is_last = 1;
 504                        }
 505                }
 506        }
 507
 508out_hole:
 509        ret = 0;
 510out:
 511        brelse(eb_bh);
 512        return ret;
 513}
 514
 515static void ocfs2_relative_extent_offsets(struct super_block *sb,
 516                                          u32 v_cluster,
 517                                          struct ocfs2_extent_rec *rec,
 518                                          u32 *p_cluster, u32 *num_clusters)
 519
 520{
 521        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 522
 523        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 524        *p_cluster = *p_cluster + coff;
 525
 526        if (num_clusters)
 527                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 528}
 529
 530int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 531                             u32 *p_cluster, u32 *num_clusters,
 532                             struct ocfs2_extent_list *el,
 533                             unsigned int *extent_flags)
 534{
 535        int ret = 0, i;
 536        struct buffer_head *eb_bh = NULL;
 537        struct ocfs2_extent_block *eb;
 538        struct ocfs2_extent_rec *rec;
 539        u32 coff;
 540
 541        if (el->l_tree_depth) {
 542                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 543                                      &eb_bh);
 544                if (ret) {
 545                        mlog_errno(ret);
 546                        goto out;
 547                }
 548
 549                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 550                el = &eb->h_list;
 551
 552                if (el->l_tree_depth) {
 553                        ocfs2_error(inode->i_sb,
 554                                    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
 555                                    inode->i_ino,
 556                                    (unsigned long long)eb_bh->b_blocknr);
 557                        ret = -EROFS;
 558                        goto out;
 559                }
 560        }
 561
 562        i = ocfs2_search_extent_list(el, v_cluster);
 563        if (i == -1) {
 564                ret = -EROFS;
 565                mlog_errno(ret);
 566                goto out;
 567        } else {
 568                rec = &el->l_recs[i];
 569                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 570
 571                if (!rec->e_blkno) {
 572                        ocfs2_error(inode->i_sb,
 573                                    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
 574                                    inode->i_ino,
 575                                    le32_to_cpu(rec->e_cpos),
 576                                    ocfs2_rec_clusters(el, rec));
 577                        ret = -EROFS;
 578                        goto out;
 579                }
 580                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 581                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 582                                                    le64_to_cpu(rec->e_blkno));
 583                *p_cluster = *p_cluster + coff;
 584                if (num_clusters)
 585                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 586
 587                if (extent_flags)
 588                        *extent_flags = rec->e_flags;
 589        }
 590out:
 591        brelse(eb_bh);
 592        return ret;
 593}
 594
 595int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 596                       u32 *p_cluster, u32 *num_clusters,
 597                       unsigned int *extent_flags)
 598{
 599        int ret;
 600        unsigned int hole_len, flags = 0;
 601        struct buffer_head *di_bh = NULL;
 602        struct ocfs2_extent_rec rec;
 603
 604        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 605                ret = -ERANGE;
 606                mlog_errno(ret);
 607                goto out;
 608        }
 609
 610        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 611                                      num_clusters, extent_flags);
 612        if (ret == 0)
 613                goto out;
 614
 615        ret = ocfs2_read_inode_block(inode, &di_bh);
 616        if (ret) {
 617                mlog_errno(ret);
 618                goto out;
 619        }
 620
 621        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 622                                         &rec, NULL);
 623        if (ret) {
 624                mlog_errno(ret);
 625                goto out;
 626        }
 627
 628        if (rec.e_blkno == 0ULL) {
 629                /*
 630                 * A hole was found. Return some canned values that
 631                 * callers can key on. If asked for, num_clusters will
 632                 * be populated with the size of the hole.
 633                 */
 634                *p_cluster = 0;
 635                if (num_clusters) {
 636                        *num_clusters = hole_len;
 637                }
 638        } else {
 639                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 640                                              p_cluster, num_clusters);
 641                flags = rec.e_flags;
 642
 643                ocfs2_extent_map_insert_rec(inode, &rec);
 644        }
 645
 646        if (extent_flags)
 647                *extent_flags = flags;
 648
 649out:
 650        brelse(di_bh);
 651        return ret;
 652}
 653
 654/*
 655 * This expects alloc_sem to be held. The allocation cannot change at
 656 * all while the map is in the process of being updated.
 657 */
 658int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 659                                u64 *ret_count, unsigned int *extent_flags)
 660{
 661        int ret;
 662        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 663        u32 cpos, num_clusters, p_cluster;
 664        u64 boff = 0;
 665
 666        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 667
 668        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 669                                 extent_flags);
 670        if (ret) {
 671                mlog_errno(ret);
 672                goto out;
 673        }
 674
 675        /*
 676         * p_cluster == 0 indicates a hole.
 677         */
 678        if (p_cluster) {
 679                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 680                boff += (v_blkno & (u64)(bpc - 1));
 681        }
 682
 683        *p_blkno = boff;
 684
 685        if (ret_count) {
 686                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 687                *ret_count -= v_blkno & (u64)(bpc - 1);
 688        }
 689
 690out:
 691        return ret;
 692}
 693
 694/*
 695 * The ocfs2_fiemap_inline() may be a little bit misleading, since
 696 * it not only handles the fiemap for inlined files, but also deals
 697 * with the fast symlink, cause they have no difference for extent
 698 * mapping per se.
 699 */
 700static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 701                               struct fiemap_extent_info *fieinfo,
 702                               u64 map_start)
 703{
 704        int ret;
 705        unsigned int id_count;
 706        struct ocfs2_dinode *di;
 707        u64 phys;
 708        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 709        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 710
 711        di = (struct ocfs2_dinode *)di_bh->b_data;
 712        if (ocfs2_inode_is_fast_symlink(inode))
 713                id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 714        else
 715                id_count = le16_to_cpu(di->id2.i_data.id_count);
 716
 717        if (map_start < id_count) {
 718                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 719                if (ocfs2_inode_is_fast_symlink(inode))
 720                        phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 721                else
 722                        phys += offsetof(struct ocfs2_dinode,
 723                                         id2.i_data.id_data);
 724
 725                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 726                                              flags);
 727                if (ret < 0)
 728                        return ret;
 729        }
 730
 731        return 0;
 732}
 733
 734int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 735                 u64 map_start, u64 map_len)
 736{
 737        int ret, is_last;
 738        u32 mapping_end, cpos;
 739        unsigned int hole_size;
 740        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 741        u64 len_bytes, phys_bytes, virt_bytes;
 742        struct buffer_head *di_bh = NULL;
 743        struct ocfs2_extent_rec rec;
 744
 745        ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0);
 746        if (ret)
 747                return ret;
 748
 749        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 750        if (ret) {
 751                mlog_errno(ret);
 752                goto out;
 753        }
 754
 755        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 756
 757        /*
 758         * Handle inline-data and fast symlink separately.
 759         */
 760        if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 761            ocfs2_inode_is_fast_symlink(inode)) {
 762                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 763                goto out_unlock;
 764        }
 765
 766        cpos = map_start >> osb->s_clustersize_bits;
 767        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 768                                               map_start + map_len);
 769        is_last = 0;
 770        while (cpos < mapping_end && !is_last) {
 771                u32 fe_flags;
 772
 773                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 774                                                 &hole_size, &rec, &is_last);
 775                if (ret) {
 776                        mlog_errno(ret);
 777                        goto out_unlock;
 778                }
 779
 780                if (rec.e_blkno == 0ULL) {
 781                        cpos += hole_size;
 782                        continue;
 783                }
 784
 785                fe_flags = 0;
 786                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 787                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 788                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 789                        fe_flags |= FIEMAP_EXTENT_SHARED;
 790                if (is_last)
 791                        fe_flags |= FIEMAP_EXTENT_LAST;
 792                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 793                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 794                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 795
 796                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 797                                              len_bytes, fe_flags);
 798                if (ret)
 799                        break;
 800
 801                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 802        }
 803
 804        if (ret > 0)
 805                ret = 0;
 806
 807out_unlock:
 808        brelse(di_bh);
 809
 810        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 811
 812        ocfs2_inode_unlock(inode, 0);
 813out:
 814
 815        return ret;
 816}
 817
 818/* Is IO overwriting allocated blocks? */
 819int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
 820                       u64 map_start, u64 map_len)
 821{
 822        int ret = 0, is_last;
 823        u32 mapping_end, cpos;
 824        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 825        struct ocfs2_extent_rec rec;
 826
 827        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 828                if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
 829                        return ret;
 830                else
 831                        return -EAGAIN;
 832        }
 833
 834        cpos = map_start >> osb->s_clustersize_bits;
 835        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 836                                               map_start + map_len);
 837        is_last = 0;
 838        while (cpos < mapping_end && !is_last) {
 839                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 840                                                 NULL, &rec, &is_last);
 841                if (ret) {
 842                        mlog_errno(ret);
 843                        goto out;
 844                }
 845
 846                if (rec.e_blkno == 0ULL)
 847                        break;
 848
 849                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 850                        break;
 851
 852                cpos = le32_to_cpu(rec.e_cpos) +
 853                        le16_to_cpu(rec.e_leaf_clusters);
 854        }
 855
 856        if (cpos < mapping_end)
 857                ret = -EAGAIN;
 858out:
 859        return ret;
 860}
 861
 862int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 863{
 864        struct inode *inode = file->f_mapping->host;
 865        int ret;
 866        unsigned int is_last = 0, is_data = 0;
 867        u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 868        u32 cpos, cend, clen, hole_size;
 869        u64 extoff, extlen;
 870        struct buffer_head *di_bh = NULL;
 871        struct ocfs2_extent_rec rec;
 872
 873        BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 874
 875        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 876        if (ret) {
 877                mlog_errno(ret);
 878                goto out;
 879        }
 880
 881        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 882
 883        if (*offset >= i_size_read(inode)) {
 884                ret = -ENXIO;
 885                goto out_unlock;
 886        }
 887
 888        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 889                if (whence == SEEK_HOLE)
 890                        *offset = i_size_read(inode);
 891                goto out_unlock;
 892        }
 893
 894        clen = 0;
 895        cpos = *offset >> cs_bits;
 896        cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 897
 898        while (cpos < cend && !is_last) {
 899                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 900                                                 &rec, &is_last);
 901                if (ret) {
 902                        mlog_errno(ret);
 903                        goto out_unlock;
 904                }
 905
 906                extoff = cpos;
 907                extoff <<= cs_bits;
 908
 909                if (rec.e_blkno == 0ULL) {
 910                        clen = hole_size;
 911                        is_data = 0;
 912                } else {
 913                        clen = le16_to_cpu(rec.e_leaf_clusters) -
 914                                (cpos - le32_to_cpu(rec.e_cpos));
 915                        is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 916                }
 917
 918                if ((!is_data && whence == SEEK_HOLE) ||
 919                    (is_data && whence == SEEK_DATA)) {
 920                        if (extoff > *offset)
 921                                *offset = extoff;
 922                        goto out_unlock;
 923                }
 924
 925                if (!is_last)
 926                        cpos += clen;
 927        }
 928
 929        if (whence == SEEK_HOLE) {
 930                extoff = cpos;
 931                extoff <<= cs_bits;
 932                extlen = clen;
 933                extlen <<=  cs_bits;
 934
 935                if ((extoff + extlen) > i_size_read(inode))
 936                        extlen = i_size_read(inode) - extoff;
 937                extoff += extlen;
 938                if (extoff > *offset)
 939                        *offset = extoff;
 940                goto out_unlock;
 941        }
 942
 943        ret = -ENXIO;
 944
 945out_unlock:
 946
 947        brelse(di_bh);
 948
 949        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 950
 951        ocfs2_inode_unlock(inode, 0);
 952out:
 953        return ret;
 954}
 955
 956int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 957                           struct buffer_head *bhs[], int flags,
 958                           int (*validate)(struct super_block *sb,
 959                                           struct buffer_head *bh))
 960{
 961        int rc = 0;
 962        u64 p_block, p_count;
 963        int i, count, done = 0;
 964
 965        trace_ocfs2_read_virt_blocks(
 966             inode, (unsigned long long)v_block, nr, bhs, flags,
 967             validate);
 968
 969        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 970            i_size_read(inode)) {
 971                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 972                goto out;
 973        }
 974
 975        while (done < nr) {
 976                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 977                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 978                                                 &p_block, &p_count, NULL);
 979                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 980                if (rc) {
 981                        mlog_errno(rc);
 982                        break;
 983                }
 984
 985                if (!p_block) {
 986                        rc = -EIO;
 987                        mlog(ML_ERROR,
 988                             "Inode #%llu contains a hole at offset %llu\n",
 989                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
 990                             (unsigned long long)(v_block + done) <<
 991                             inode->i_sb->s_blocksize_bits);
 992                        break;
 993                }
 994
 995                count = nr - done;
 996                if (p_count < count)
 997                        count = p_count;
 998
 999                /*
1000                 * If the caller passed us bhs, they should have come
1001                 * from a previous readahead call to this function.  Thus,
1002                 * they should have the right b_blocknr.
1003                 */
1004                for (i = 0; i < count; i++) {
1005                        if (!bhs[done + i])
1006                                continue;
1007                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1008                }
1009
1010                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
1011                                       bhs + done, flags, validate);
1012                if (rc) {
1013                        mlog_errno(rc);
1014                        break;
1015                }
1016                done += count;
1017        }
1018
1019out:
1020        return rc;
1021}
1022
1023
1024