linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* -*- mode: c; c-basic-offset: 8; -*-
   3 * vim: noexpandtab sw=8 ts=8 sts=0:
   4 *
   5 * extent_map.c
   6 *
   7 * Block/Cluster mapping functions
   8 *
   9 * Copyright (C) 2004 Oracle.  All rights reserved.
  10 */
  11
  12#include <linux/fs.h>
  13#include <linux/init.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/fiemap.h>
  17
  18#include <cluster/masklog.h>
  19
  20#include "ocfs2.h"
  21
  22#include "alloc.h"
  23#include "dlmglue.h"
  24#include "extent_map.h"
  25#include "inode.h"
  26#include "super.h"
  27#include "symlink.h"
  28#include "aops.h"
  29#include "ocfs2_trace.h"
  30
  31#include "buffer_head_io.h"
  32
  33/*
  34 * The extent caching implementation is intentionally trivial.
  35 *
  36 * We only cache a small number of extents stored directly on the
  37 * inode, so linear order operations are acceptable. If we ever want
  38 * to increase the size of the extent map, then these algorithms must
  39 * get smarter.
  40 */
  41
  42void ocfs2_extent_map_init(struct inode *inode)
  43{
  44        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  45
  46        oi->ip_extent_map.em_num_items = 0;
  47        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  48}
  49
  50static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  51                                      unsigned int cpos,
  52                                      struct ocfs2_extent_map_item **ret_emi)
  53{
  54        unsigned int range;
  55        struct ocfs2_extent_map_item *emi;
  56
  57        *ret_emi = NULL;
  58
  59        list_for_each_entry(emi, &em->em_list, ei_list) {
  60                range = emi->ei_cpos + emi->ei_clusters;
  61
  62                if (cpos >= emi->ei_cpos && cpos < range) {
  63                        list_move(&emi->ei_list, &em->em_list);
  64
  65                        *ret_emi = emi;
  66                        break;
  67                }
  68        }
  69}
  70
  71static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  72                                   unsigned int *phys, unsigned int *len,
  73                                   unsigned int *flags)
  74{
  75        unsigned int coff;
  76        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  77        struct ocfs2_extent_map_item *emi;
  78
  79        spin_lock(&oi->ip_lock);
  80
  81        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  82        if (emi) {
  83                coff = cpos - emi->ei_cpos;
  84                *phys = emi->ei_phys + coff;
  85                if (len)
  86                        *len = emi->ei_clusters - coff;
  87                if (flags)
  88                        *flags = emi->ei_flags;
  89        }
  90
  91        spin_unlock(&oi->ip_lock);
  92
  93        if (emi == NULL)
  94                return -ENOENT;
  95
  96        return 0;
  97}
  98
  99/*
 100 * Forget about all clusters equal to or greater than cpos.
 101 */
 102void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 103{
 104        struct ocfs2_extent_map_item *emi, *n;
 105        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 106        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 107        LIST_HEAD(tmp_list);
 108        unsigned int range;
 109
 110        spin_lock(&oi->ip_lock);
 111        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 112                if (emi->ei_cpos >= cpos) {
 113                        /* Full truncate of this record. */
 114                        list_move(&emi->ei_list, &tmp_list);
 115                        BUG_ON(em->em_num_items == 0);
 116                        em->em_num_items--;
 117                        continue;
 118                }
 119
 120                range = emi->ei_cpos + emi->ei_clusters;
 121                if (range > cpos) {
 122                        /* Partial truncate */
 123                        emi->ei_clusters = cpos - emi->ei_cpos;
 124                }
 125        }
 126        spin_unlock(&oi->ip_lock);
 127
 128        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 129                list_del(&emi->ei_list);
 130                kfree(emi);
 131        }
 132}
 133
 134/*
 135 * Is any part of emi2 contained within emi1
 136 */
 137static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 138                                 struct ocfs2_extent_map_item *emi2)
 139{
 140        unsigned int range1, range2;
 141
 142        /*
 143         * Check if logical start of emi2 is inside emi1
 144         */
 145        range1 = emi1->ei_cpos + emi1->ei_clusters;
 146        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 147                return 1;
 148
 149        /*
 150         * Check if logical end of emi2 is inside emi1
 151         */
 152        range2 = emi2->ei_cpos + emi2->ei_clusters;
 153        if (range2 > emi1->ei_cpos && range2 <= range1)
 154                return 1;
 155
 156        return 0;
 157}
 158
 159static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 160                                  struct ocfs2_extent_map_item *src)
 161{
 162        dest->ei_cpos = src->ei_cpos;
 163        dest->ei_phys = src->ei_phys;
 164        dest->ei_clusters = src->ei_clusters;
 165        dest->ei_flags = src->ei_flags;
 166}
 167
 168/*
 169 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 170 * otherwise.
 171 */
 172static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 173                                         struct ocfs2_extent_map_item *ins)
 174{
 175        /*
 176         * Handle contiguousness
 177         */
 178        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 179            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 180            ins->ei_flags == emi->ei_flags) {
 181                emi->ei_clusters += ins->ei_clusters;
 182                return 1;
 183        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 184                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 185                   ins->ei_flags == emi->ei_flags) {
 186                emi->ei_phys = ins->ei_phys;
 187                emi->ei_cpos = ins->ei_cpos;
 188                emi->ei_clusters += ins->ei_clusters;
 189                return 1;
 190        }
 191
 192        /*
 193         * Overlapping extents - this shouldn't happen unless we've
 194         * split an extent to change it's flags. That is exceedingly
 195         * rare, so there's no sense in trying to optimize it yet.
 196         */
 197        if (ocfs2_ei_is_contained(emi, ins) ||
 198            ocfs2_ei_is_contained(ins, emi)) {
 199                ocfs2_copy_emi_fields(emi, ins);
 200                return 1;
 201        }
 202
 203        /* No merge was possible. */
 204        return 0;
 205}
 206
 207/*
 208 * In order to reduce complexity on the caller, this insert function
 209 * is intentionally liberal in what it will accept.
 210 *
 211 * The only rule is that the truncate call *must* be used whenever
 212 * records have been deleted. This avoids inserting overlapping
 213 * records with different physical mappings.
 214 */
 215void ocfs2_extent_map_insert_rec(struct inode *inode,
 216                                 struct ocfs2_extent_rec *rec)
 217{
 218        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 219        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 220        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 221        struct ocfs2_extent_map_item ins;
 222
 223        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 224        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 225                                               le64_to_cpu(rec->e_blkno));
 226        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 227        ins.ei_flags = rec->e_flags;
 228
 229search:
 230        spin_lock(&oi->ip_lock);
 231
 232        list_for_each_entry(emi, &em->em_list, ei_list) {
 233                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 234                        list_move(&emi->ei_list, &em->em_list);
 235                        spin_unlock(&oi->ip_lock);
 236                        goto out;
 237                }
 238        }
 239
 240        /*
 241         * No item could be merged.
 242         *
 243         * Either allocate and add a new item, or overwrite the last recently
 244         * inserted.
 245         */
 246
 247        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 248                if (new_emi == NULL) {
 249                        spin_unlock(&oi->ip_lock);
 250
 251                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 252                        if (new_emi == NULL)
 253                                goto out;
 254
 255                        goto search;
 256                }
 257
 258                ocfs2_copy_emi_fields(new_emi, &ins);
 259                list_add(&new_emi->ei_list, &em->em_list);
 260                em->em_num_items++;
 261                new_emi = NULL;
 262        } else {
 263                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 264                emi = list_entry(em->em_list.prev,
 265                                 struct ocfs2_extent_map_item, ei_list);
 266                list_move(&emi->ei_list, &em->em_list);
 267                ocfs2_copy_emi_fields(emi, &ins);
 268        }
 269
 270        spin_unlock(&oi->ip_lock);
 271
 272out:
 273        kfree(new_emi);
 274}
 275
 276static int ocfs2_last_eb_is_empty(struct inode *inode,
 277                                  struct ocfs2_dinode *di)
 278{
 279        int ret, next_free;
 280        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 281        struct buffer_head *eb_bh = NULL;
 282        struct ocfs2_extent_block *eb;
 283        struct ocfs2_extent_list *el;
 284
 285        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 286        if (ret) {
 287                mlog_errno(ret);
 288                goto out;
 289        }
 290
 291        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 292        el = &eb->h_list;
 293
 294        if (el->l_tree_depth) {
 295                ocfs2_error(inode->i_sb,
 296                            "Inode %lu has non zero tree depth in leaf block %llu\n",
 297                            inode->i_ino,
 298                            (unsigned long long)eb_bh->b_blocknr);
 299                ret = -EROFS;
 300                goto out;
 301        }
 302
 303        next_free = le16_to_cpu(el->l_next_free_rec);
 304
 305        if (next_free == 0 ||
 306            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 307                ret = 1;
 308
 309out:
 310        brelse(eb_bh);
 311        return ret;
 312}
 313
 314/*
 315 * Return the 1st index within el which contains an extent start
 316 * larger than v_cluster.
 317 */
 318static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 319                                       u32 v_cluster)
 320{
 321        int i;
 322        struct ocfs2_extent_rec *rec;
 323
 324        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 325                rec = &el->l_recs[i];
 326
 327                if (v_cluster < le32_to_cpu(rec->e_cpos))
 328                        break;
 329        }
 330
 331        return i;
 332}
 333
 334/*
 335 * Figure out the size of a hole which starts at v_cluster within the given
 336 * extent list.
 337 *
 338 * If there is no more allocation past v_cluster, we return the maximum
 339 * cluster size minus v_cluster.
 340 *
 341 * If we have in-inode extents, then el points to the dinode list and
 342 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 343 * containing el.
 344 */
 345int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 346                               struct ocfs2_extent_list *el,
 347                               struct buffer_head *eb_bh,
 348                               u32 v_cluster,
 349                               u32 *num_clusters)
 350{
 351        int ret, i;
 352        struct buffer_head *next_eb_bh = NULL;
 353        struct ocfs2_extent_block *eb, *next_eb;
 354
 355        i = ocfs2_search_for_hole_index(el, v_cluster);
 356
 357        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 358                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 359
 360                /*
 361                 * Check the next leaf for any extents.
 362                 */
 363
 364                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 365                        goto no_more_extents;
 366
 367                ret = ocfs2_read_extent_block(ci,
 368                                              le64_to_cpu(eb->h_next_leaf_blk),
 369                                              &next_eb_bh);
 370                if (ret) {
 371                        mlog_errno(ret);
 372                        goto out;
 373                }
 374
 375                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 376                el = &next_eb->h_list;
 377                i = ocfs2_search_for_hole_index(el, v_cluster);
 378        }
 379
 380no_more_extents:
 381        if (i == le16_to_cpu(el->l_next_free_rec)) {
 382                /*
 383                 * We're at the end of our existing allocation. Just
 384                 * return the maximum number of clusters we could
 385                 * possibly allocate.
 386                 */
 387                *num_clusters = UINT_MAX - v_cluster;
 388        } else {
 389                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 390        }
 391
 392        ret = 0;
 393out:
 394        brelse(next_eb_bh);
 395        return ret;
 396}
 397
 398static int ocfs2_get_clusters_nocache(struct inode *inode,
 399                                      struct buffer_head *di_bh,
 400                                      u32 v_cluster, unsigned int *hole_len,
 401                                      struct ocfs2_extent_rec *ret_rec,
 402                                      unsigned int *is_last)
 403{
 404        int i, ret, tree_height, len;
 405        struct ocfs2_dinode *di;
 406        struct ocfs2_extent_block *eb;
 407        struct ocfs2_extent_list *el;
 408        struct ocfs2_extent_rec *rec;
 409        struct buffer_head *eb_bh = NULL;
 410
 411        memset(ret_rec, 0, sizeof(*ret_rec));
 412        if (is_last)
 413                *is_last = 0;
 414
 415        di = (struct ocfs2_dinode *) di_bh->b_data;
 416        el = &di->id2.i_list;
 417        tree_height = le16_to_cpu(el->l_tree_depth);
 418
 419        if (tree_height > 0) {
 420                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 421                                      &eb_bh);
 422                if (ret) {
 423                        mlog_errno(ret);
 424                        goto out;
 425                }
 426
 427                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 428                el = &eb->h_list;
 429
 430                if (el->l_tree_depth) {
 431                        ocfs2_error(inode->i_sb,
 432                                    "Inode %lu has non zero tree depth in leaf block %llu\n",
 433                                    inode->i_ino,
 434                                    (unsigned long long)eb_bh->b_blocknr);
 435                        ret = -EROFS;
 436                        goto out;
 437                }
 438        }
 439
 440        i = ocfs2_search_extent_list(el, v_cluster);
 441        if (i == -1) {
 442                /*
 443                 * Holes can be larger than the maximum size of an
 444                 * extent, so we return their lengths in a separate
 445                 * field.
 446                 */
 447                if (hole_len) {
 448                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 449                                                         el, eb_bh,
 450                                                         v_cluster, &len);
 451                        if (ret) {
 452                                mlog_errno(ret);
 453                                goto out;
 454                        }
 455
 456                        *hole_len = len;
 457                }
 458                goto out_hole;
 459        }
 460
 461        rec = &el->l_recs[i];
 462
 463        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 464
 465        if (!rec->e_blkno) {
 466                ocfs2_error(inode->i_sb,
 467                            "Inode %lu has bad extent record (%u, %u, 0)\n",
 468                            inode->i_ino,
 469                            le32_to_cpu(rec->e_cpos),
 470                            ocfs2_rec_clusters(el, rec));
 471                ret = -EROFS;
 472                goto out;
 473        }
 474
 475        *ret_rec = *rec;
 476
 477        /*
 478         * Checking for last extent is potentially expensive - we
 479         * might have to look at the next leaf over to see if it's
 480         * empty.
 481         *
 482         * The first two checks are to see whether the caller even
 483         * cares for this information, and if the extent is at least
 484         * the last in it's list.
 485         *
 486         * If those hold true, then the extent is last if any of the
 487         * additional conditions hold true:
 488         *  - Extent list is in-inode
 489         *  - Extent list is right-most
 490         *  - Extent list is 2nd to rightmost, with empty right-most
 491         */
 492        if (is_last) {
 493                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 494                        if (tree_height == 0)
 495                                *is_last = 1;
 496                        else if (eb->h_blkno == di->i_last_eb_blk)
 497                                *is_last = 1;
 498                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 499                                ret = ocfs2_last_eb_is_empty(inode, di);
 500                                if (ret < 0) {
 501                                        mlog_errno(ret);
 502                                        goto out;
 503                                }
 504                                if (ret == 1)
 505                                        *is_last = 1;
 506                        }
 507                }
 508        }
 509
 510out_hole:
 511        ret = 0;
 512out:
 513        brelse(eb_bh);
 514        return ret;
 515}
 516
 517static void ocfs2_relative_extent_offsets(struct super_block *sb,
 518                                          u32 v_cluster,
 519                                          struct ocfs2_extent_rec *rec,
 520                                          u32 *p_cluster, u32 *num_clusters)
 521
 522{
 523        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 524
 525        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 526        *p_cluster = *p_cluster + coff;
 527
 528        if (num_clusters)
 529                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 530}
 531
 532int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 533                             u32 *p_cluster, u32 *num_clusters,
 534                             struct ocfs2_extent_list *el,
 535                             unsigned int *extent_flags)
 536{
 537        int ret = 0, i;
 538        struct buffer_head *eb_bh = NULL;
 539        struct ocfs2_extent_block *eb;
 540        struct ocfs2_extent_rec *rec;
 541        u32 coff;
 542
 543        if (el->l_tree_depth) {
 544                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 545                                      &eb_bh);
 546                if (ret) {
 547                        mlog_errno(ret);
 548                        goto out;
 549                }
 550
 551                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 552                el = &eb->h_list;
 553
 554                if (el->l_tree_depth) {
 555                        ocfs2_error(inode->i_sb,
 556                                    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
 557                                    inode->i_ino,
 558                                    (unsigned long long)eb_bh->b_blocknr);
 559                        ret = -EROFS;
 560                        goto out;
 561                }
 562        }
 563
 564        i = ocfs2_search_extent_list(el, v_cluster);
 565        if (i == -1) {
 566                ret = -EROFS;
 567                mlog_errno(ret);
 568                goto out;
 569        } else {
 570                rec = &el->l_recs[i];
 571                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 572
 573                if (!rec->e_blkno) {
 574                        ocfs2_error(inode->i_sb,
 575                                    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
 576                                    inode->i_ino,
 577                                    le32_to_cpu(rec->e_cpos),
 578                                    ocfs2_rec_clusters(el, rec));
 579                        ret = -EROFS;
 580                        goto out;
 581                }
 582                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 583                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 584                                                    le64_to_cpu(rec->e_blkno));
 585                *p_cluster = *p_cluster + coff;
 586                if (num_clusters)
 587                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 588
 589                if (extent_flags)
 590                        *extent_flags = rec->e_flags;
 591        }
 592out:
 593        brelse(eb_bh);
 594        return ret;
 595}
 596
 597int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 598                       u32 *p_cluster, u32 *num_clusters,
 599                       unsigned int *extent_flags)
 600{
 601        int ret;
 602        unsigned int hole_len, flags = 0;
 603        struct buffer_head *di_bh = NULL;
 604        struct ocfs2_extent_rec rec;
 605
 606        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 607                ret = -ERANGE;
 608                mlog_errno(ret);
 609                goto out;
 610        }
 611
 612        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 613                                      num_clusters, extent_flags);
 614        if (ret == 0)
 615                goto out;
 616
 617        ret = ocfs2_read_inode_block(inode, &di_bh);
 618        if (ret) {
 619                mlog_errno(ret);
 620                goto out;
 621        }
 622
 623        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 624                                         &rec, NULL);
 625        if (ret) {
 626                mlog_errno(ret);
 627                goto out;
 628        }
 629
 630        if (rec.e_blkno == 0ULL) {
 631                /*
 632                 * A hole was found. Return some canned values that
 633                 * callers can key on. If asked for, num_clusters will
 634                 * be populated with the size of the hole.
 635                 */
 636                *p_cluster = 0;
 637                if (num_clusters) {
 638                        *num_clusters = hole_len;
 639                }
 640        } else {
 641                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 642                                              p_cluster, num_clusters);
 643                flags = rec.e_flags;
 644
 645                ocfs2_extent_map_insert_rec(inode, &rec);
 646        }
 647
 648        if (extent_flags)
 649                *extent_flags = flags;
 650
 651out:
 652        brelse(di_bh);
 653        return ret;
 654}
 655
 656/*
 657 * This expects alloc_sem to be held. The allocation cannot change at
 658 * all while the map is in the process of being updated.
 659 */
 660int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 661                                u64 *ret_count, unsigned int *extent_flags)
 662{
 663        int ret;
 664        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 665        u32 cpos, num_clusters, p_cluster;
 666        u64 boff = 0;
 667
 668        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 669
 670        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 671                                 extent_flags);
 672        if (ret) {
 673                mlog_errno(ret);
 674                goto out;
 675        }
 676
 677        /*
 678         * p_cluster == 0 indicates a hole.
 679         */
 680        if (p_cluster) {
 681                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 682                boff += (v_blkno & (u64)(bpc - 1));
 683        }
 684
 685        *p_blkno = boff;
 686
 687        if (ret_count) {
 688                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 689                *ret_count -= v_blkno & (u64)(bpc - 1);
 690        }
 691
 692out:
 693        return ret;
 694}
 695
 696/*
 697 * The ocfs2_fiemap_inline() may be a little bit misleading, since
 698 * it not only handles the fiemap for inlined files, but also deals
 699 * with the fast symlink, cause they have no difference for extent
 700 * mapping per se.
 701 */
 702static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 703                               struct fiemap_extent_info *fieinfo,
 704                               u64 map_start)
 705{
 706        int ret;
 707        unsigned int id_count;
 708        struct ocfs2_dinode *di;
 709        u64 phys;
 710        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 711        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 712
 713        di = (struct ocfs2_dinode *)di_bh->b_data;
 714        if (ocfs2_inode_is_fast_symlink(inode))
 715                id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 716        else
 717                id_count = le16_to_cpu(di->id2.i_data.id_count);
 718
 719        if (map_start < id_count) {
 720                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 721                if (ocfs2_inode_is_fast_symlink(inode))
 722                        phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 723                else
 724                        phys += offsetof(struct ocfs2_dinode,
 725                                         id2.i_data.id_data);
 726
 727                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 728                                              flags);
 729                if (ret < 0)
 730                        return ret;
 731        }
 732
 733        return 0;
 734}
 735
 736int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 737                 u64 map_start, u64 map_len)
 738{
 739        int ret, is_last;
 740        u32 mapping_end, cpos;
 741        unsigned int hole_size;
 742        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 743        u64 len_bytes, phys_bytes, virt_bytes;
 744        struct buffer_head *di_bh = NULL;
 745        struct ocfs2_extent_rec rec;
 746
 747        ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0);
 748        if (ret)
 749                return ret;
 750
 751        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 752        if (ret) {
 753                mlog_errno(ret);
 754                goto out;
 755        }
 756
 757        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 758
 759        /*
 760         * Handle inline-data and fast symlink separately.
 761         */
 762        if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 763            ocfs2_inode_is_fast_symlink(inode)) {
 764                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 765                goto out_unlock;
 766        }
 767
 768        cpos = map_start >> osb->s_clustersize_bits;
 769        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 770                                               map_start + map_len);
 771        is_last = 0;
 772        while (cpos < mapping_end && !is_last) {
 773                u32 fe_flags;
 774
 775                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 776                                                 &hole_size, &rec, &is_last);
 777                if (ret) {
 778                        mlog_errno(ret);
 779                        goto out_unlock;
 780                }
 781
 782                if (rec.e_blkno == 0ULL) {
 783                        cpos += hole_size;
 784                        continue;
 785                }
 786
 787                fe_flags = 0;
 788                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 789                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 790                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 791                        fe_flags |= FIEMAP_EXTENT_SHARED;
 792                if (is_last)
 793                        fe_flags |= FIEMAP_EXTENT_LAST;
 794                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 795                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 796                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 797
 798                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 799                                              len_bytes, fe_flags);
 800                if (ret)
 801                        break;
 802
 803                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 804        }
 805
 806        if (ret > 0)
 807                ret = 0;
 808
 809out_unlock:
 810        brelse(di_bh);
 811
 812        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 813
 814        ocfs2_inode_unlock(inode, 0);
 815out:
 816
 817        return ret;
 818}
 819
 820/* Is IO overwriting allocated blocks? */
 821int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
 822                       u64 map_start, u64 map_len)
 823{
 824        int ret = 0, is_last;
 825        u32 mapping_end, cpos;
 826        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 827        struct ocfs2_extent_rec rec;
 828
 829        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 830                if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
 831                        return ret;
 832                else
 833                        return -EAGAIN;
 834        }
 835
 836        cpos = map_start >> osb->s_clustersize_bits;
 837        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 838                                               map_start + map_len);
 839        is_last = 0;
 840        while (cpos < mapping_end && !is_last) {
 841                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 842                                                 NULL, &rec, &is_last);
 843                if (ret) {
 844                        mlog_errno(ret);
 845                        goto out;
 846                }
 847
 848                if (rec.e_blkno == 0ULL)
 849                        break;
 850
 851                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 852                        break;
 853
 854                cpos = le32_to_cpu(rec.e_cpos) +
 855                        le16_to_cpu(rec.e_leaf_clusters);
 856        }
 857
 858        if (cpos < mapping_end)
 859                ret = -EAGAIN;
 860out:
 861        return ret;
 862}
 863
 864int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 865{
 866        struct inode *inode = file->f_mapping->host;
 867        int ret;
 868        unsigned int is_last = 0, is_data = 0;
 869        u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 870        u32 cpos, cend, clen, hole_size;
 871        u64 extoff, extlen;
 872        struct buffer_head *di_bh = NULL;
 873        struct ocfs2_extent_rec rec;
 874
 875        BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 876
 877        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 878        if (ret) {
 879                mlog_errno(ret);
 880                goto out;
 881        }
 882
 883        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 884
 885        if (*offset >= i_size_read(inode)) {
 886                ret = -ENXIO;
 887                goto out_unlock;
 888        }
 889
 890        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 891                if (whence == SEEK_HOLE)
 892                        *offset = i_size_read(inode);
 893                goto out_unlock;
 894        }
 895
 896        clen = 0;
 897        cpos = *offset >> cs_bits;
 898        cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 899
 900        while (cpos < cend && !is_last) {
 901                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 902                                                 &rec, &is_last);
 903                if (ret) {
 904                        mlog_errno(ret);
 905                        goto out_unlock;
 906                }
 907
 908                extoff = cpos;
 909                extoff <<= cs_bits;
 910
 911                if (rec.e_blkno == 0ULL) {
 912                        clen = hole_size;
 913                        is_data = 0;
 914                } else {
 915                        clen = le16_to_cpu(rec.e_leaf_clusters) -
 916                                (cpos - le32_to_cpu(rec.e_cpos));
 917                        is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 918                }
 919
 920                if ((!is_data && whence == SEEK_HOLE) ||
 921                    (is_data && whence == SEEK_DATA)) {
 922                        if (extoff > *offset)
 923                                *offset = extoff;
 924                        goto out_unlock;
 925                }
 926
 927                if (!is_last)
 928                        cpos += clen;
 929        }
 930
 931        if (whence == SEEK_HOLE) {
 932                extoff = cpos;
 933                extoff <<= cs_bits;
 934                extlen = clen;
 935                extlen <<=  cs_bits;
 936
 937                if ((extoff + extlen) > i_size_read(inode))
 938                        extlen = i_size_read(inode) - extoff;
 939                extoff += extlen;
 940                if (extoff > *offset)
 941                        *offset = extoff;
 942                goto out_unlock;
 943        }
 944
 945        ret = -ENXIO;
 946
 947out_unlock:
 948
 949        brelse(di_bh);
 950
 951        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 952
 953        ocfs2_inode_unlock(inode, 0);
 954out:
 955        return ret;
 956}
 957
 958int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 959                           struct buffer_head *bhs[], int flags,
 960                           int (*validate)(struct super_block *sb,
 961                                           struct buffer_head *bh))
 962{
 963        int rc = 0;
 964        u64 p_block, p_count;
 965        int i, count, done = 0;
 966
 967        trace_ocfs2_read_virt_blocks(
 968             inode, (unsigned long long)v_block, nr, bhs, flags,
 969             validate);
 970
 971        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 972            i_size_read(inode)) {
 973                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 974                goto out;
 975        }
 976
 977        while (done < nr) {
 978                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 979                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 980                                                 &p_block, &p_count, NULL);
 981                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 982                if (rc) {
 983                        mlog_errno(rc);
 984                        break;
 985                }
 986
 987                if (!p_block) {
 988                        rc = -EIO;
 989                        mlog(ML_ERROR,
 990                             "Inode #%llu contains a hole at offset %llu\n",
 991                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
 992                             (unsigned long long)(v_block + done) <<
 993                             inode->i_sb->s_blocksize_bits);
 994                        break;
 995                }
 996
 997                count = nr - done;
 998                if (p_count < count)
 999                        count = p_count;
1000
1001                /*
1002                 * If the caller passed us bhs, they should have come
1003                 * from a previous readahead call to this function.  Thus,
1004                 * they should have the right b_blocknr.
1005                 */
1006                for (i = 0; i < count; i++) {
1007                        if (!bhs[done + i])
1008                                continue;
1009                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1010                }
1011
1012                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
1013                                       bhs + done, flags, validate);
1014                if (rc) {
1015                        mlog_errno(rc);
1016                        break;
1017                }
1018                done += count;
1019        }
1020
1021out:
1022        return rc;
1023}
1024
1025
1026