linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * extent_map.c
   5 *
   6 * Block/Cluster mapping functions
   7 *
   8 * Copyright (C) 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License, version 2,  as published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public
  20 * License along with this program; if not, write to the
  21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  22 * Boston, MA 021110-1307, USA.
  23 */
  24
  25#include <linux/fs.h>
  26#include <linux/init.h>
  27#include <linux/slab.h>
  28#include <linux/types.h>
  29#include <linux/fiemap.h>
  30
  31#include <cluster/masklog.h>
  32
  33#include "ocfs2.h"
  34
  35#include "alloc.h"
  36#include "dlmglue.h"
  37#include "extent_map.h"
  38#include "inode.h"
  39#include "super.h"
  40#include "symlink.h"
  41#include "ocfs2_trace.h"
  42
  43#include "buffer_head_io.h"
  44
  45/*
  46 * The extent caching implementation is intentionally trivial.
  47 *
  48 * We only cache a small number of extents stored directly on the
  49 * inode, so linear order operations are acceptable. If we ever want
  50 * to increase the size of the extent map, then these algorithms must
  51 * get smarter.
  52 */
  53
  54void ocfs2_extent_map_init(struct inode *inode)
  55{
  56        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  57
  58        oi->ip_extent_map.em_num_items = 0;
  59        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  60}
  61
  62static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  63                                      unsigned int cpos,
  64                                      struct ocfs2_extent_map_item **ret_emi)
  65{
  66        unsigned int range;
  67        struct ocfs2_extent_map_item *emi;
  68
  69        *ret_emi = NULL;
  70
  71        list_for_each_entry(emi, &em->em_list, ei_list) {
  72                range = emi->ei_cpos + emi->ei_clusters;
  73
  74                if (cpos >= emi->ei_cpos && cpos < range) {
  75                        list_move(&emi->ei_list, &em->em_list);
  76
  77                        *ret_emi = emi;
  78                        break;
  79                }
  80        }
  81}
  82
  83static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  84                                   unsigned int *phys, unsigned int *len,
  85                                   unsigned int *flags)
  86{
  87        unsigned int coff;
  88        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  89        struct ocfs2_extent_map_item *emi;
  90
  91        spin_lock(&oi->ip_lock);
  92
  93        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  94        if (emi) {
  95                coff = cpos - emi->ei_cpos;
  96                *phys = emi->ei_phys + coff;
  97                if (len)
  98                        *len = emi->ei_clusters - coff;
  99                if (flags)
 100                        *flags = emi->ei_flags;
 101        }
 102
 103        spin_unlock(&oi->ip_lock);
 104
 105        if (emi == NULL)
 106                return -ENOENT;
 107
 108        return 0;
 109}
 110
 111/*
 112 * Forget about all clusters equal to or greater than cpos.
 113 */
 114void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 115{
 116        struct ocfs2_extent_map_item *emi, *n;
 117        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 118        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 119        LIST_HEAD(tmp_list);
 120        unsigned int range;
 121
 122        spin_lock(&oi->ip_lock);
 123        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 124                if (emi->ei_cpos >= cpos) {
 125                        /* Full truncate of this record. */
 126                        list_move(&emi->ei_list, &tmp_list);
 127                        BUG_ON(em->em_num_items == 0);
 128                        em->em_num_items--;
 129                        continue;
 130                }
 131
 132                range = emi->ei_cpos + emi->ei_clusters;
 133                if (range > cpos) {
 134                        /* Partial truncate */
 135                        emi->ei_clusters = cpos - emi->ei_cpos;
 136                }
 137        }
 138        spin_unlock(&oi->ip_lock);
 139
 140        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 141                list_del(&emi->ei_list);
 142                kfree(emi);
 143        }
 144}
 145
 146/*
 147 * Is any part of emi2 contained within emi1
 148 */
 149static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 150                                 struct ocfs2_extent_map_item *emi2)
 151{
 152        unsigned int range1, range2;
 153
 154        /*
 155         * Check if logical start of emi2 is inside emi1
 156         */
 157        range1 = emi1->ei_cpos + emi1->ei_clusters;
 158        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 159                return 1;
 160
 161        /*
 162         * Check if logical end of emi2 is inside emi1
 163         */
 164        range2 = emi2->ei_cpos + emi2->ei_clusters;
 165        if (range2 > emi1->ei_cpos && range2 <= range1)
 166                return 1;
 167
 168        return 0;
 169}
 170
 171static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 172                                  struct ocfs2_extent_map_item *src)
 173{
 174        dest->ei_cpos = src->ei_cpos;
 175        dest->ei_phys = src->ei_phys;
 176        dest->ei_clusters = src->ei_clusters;
 177        dest->ei_flags = src->ei_flags;
 178}
 179
 180/*
 181 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 182 * otherwise.
 183 */
 184static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 185                                         struct ocfs2_extent_map_item *ins)
 186{
 187        /*
 188         * Handle contiguousness
 189         */
 190        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 191            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 192            ins->ei_flags == emi->ei_flags) {
 193                emi->ei_clusters += ins->ei_clusters;
 194                return 1;
 195        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 196                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 197                   ins->ei_flags == emi->ei_flags) {
 198                emi->ei_phys = ins->ei_phys;
 199                emi->ei_cpos = ins->ei_cpos;
 200                emi->ei_clusters += ins->ei_clusters;
 201                return 1;
 202        }
 203
 204        /*
 205         * Overlapping extents - this shouldn't happen unless we've
 206         * split an extent to change it's flags. That is exceedingly
 207         * rare, so there's no sense in trying to optimize it yet.
 208         */
 209        if (ocfs2_ei_is_contained(emi, ins) ||
 210            ocfs2_ei_is_contained(ins, emi)) {
 211                ocfs2_copy_emi_fields(emi, ins);
 212                return 1;
 213        }
 214
 215        /* No merge was possible. */
 216        return 0;
 217}
 218
 219/*
 220 * In order to reduce complexity on the caller, this insert function
 221 * is intentionally liberal in what it will accept.
 222 *
 223 * The only rule is that the truncate call *must* be used whenever
 224 * records have been deleted. This avoids inserting overlapping
 225 * records with different physical mappings.
 226 */
 227void ocfs2_extent_map_insert_rec(struct inode *inode,
 228                                 struct ocfs2_extent_rec *rec)
 229{
 230        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 231        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 232        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 233        struct ocfs2_extent_map_item ins;
 234
 235        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 236        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 237                                               le64_to_cpu(rec->e_blkno));
 238        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 239        ins.ei_flags = rec->e_flags;
 240
 241search:
 242        spin_lock(&oi->ip_lock);
 243
 244        list_for_each_entry(emi, &em->em_list, ei_list) {
 245                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 246                        list_move(&emi->ei_list, &em->em_list);
 247                        spin_unlock(&oi->ip_lock);
 248                        goto out;
 249                }
 250        }
 251
 252        /*
 253         * No item could be merged.
 254         *
 255         * Either allocate and add a new item, or overwrite the last recently
 256         * inserted.
 257         */
 258
 259        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 260                if (new_emi == NULL) {
 261                        spin_unlock(&oi->ip_lock);
 262
 263                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 264                        if (new_emi == NULL)
 265                                goto out;
 266
 267                        goto search;
 268                }
 269
 270                ocfs2_copy_emi_fields(new_emi, &ins);
 271                list_add(&new_emi->ei_list, &em->em_list);
 272                em->em_num_items++;
 273                new_emi = NULL;
 274        } else {
 275                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 276                emi = list_entry(em->em_list.prev,
 277                                 struct ocfs2_extent_map_item, ei_list);
 278                list_move(&emi->ei_list, &em->em_list);
 279                ocfs2_copy_emi_fields(emi, &ins);
 280        }
 281
 282        spin_unlock(&oi->ip_lock);
 283
 284out:
 285        kfree(new_emi);
 286}
 287
 288static int ocfs2_last_eb_is_empty(struct inode *inode,
 289                                  struct ocfs2_dinode *di)
 290{
 291        int ret, next_free;
 292        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 293        struct buffer_head *eb_bh = NULL;
 294        struct ocfs2_extent_block *eb;
 295        struct ocfs2_extent_list *el;
 296
 297        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 298        if (ret) {
 299                mlog_errno(ret);
 300                goto out;
 301        }
 302
 303        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 304        el = &eb->h_list;
 305
 306        if (el->l_tree_depth) {
 307                ocfs2_error(inode->i_sb,
 308                            "Inode %lu has non zero tree depth in leaf block %llu\n",
 309                            inode->i_ino,
 310                            (unsigned long long)eb_bh->b_blocknr);
 311                ret = -EROFS;
 312                goto out;
 313        }
 314
 315        next_free = le16_to_cpu(el->l_next_free_rec);
 316
 317        if (next_free == 0 ||
 318            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 319                ret = 1;
 320
 321out:
 322        brelse(eb_bh);
 323        return ret;
 324}
 325
 326/*
 327 * Return the 1st index within el which contains an extent start
 328 * larger than v_cluster.
 329 */
 330static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 331                                       u32 v_cluster)
 332{
 333        int i;
 334        struct ocfs2_extent_rec *rec;
 335
 336        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 337                rec = &el->l_recs[i];
 338
 339                if (v_cluster < le32_to_cpu(rec->e_cpos))
 340                        break;
 341        }
 342
 343        return i;
 344}
 345
 346/*
 347 * Figure out the size of a hole which starts at v_cluster within the given
 348 * extent list.
 349 *
 350 * If there is no more allocation past v_cluster, we return the maximum
 351 * cluster size minus v_cluster.
 352 *
 353 * If we have in-inode extents, then el points to the dinode list and
 354 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 355 * containing el.
 356 */
 357int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 358                               struct ocfs2_extent_list *el,
 359                               struct buffer_head *eb_bh,
 360                               u32 v_cluster,
 361                               u32 *num_clusters)
 362{
 363        int ret, i;
 364        struct buffer_head *next_eb_bh = NULL;
 365        struct ocfs2_extent_block *eb, *next_eb;
 366
 367        i = ocfs2_search_for_hole_index(el, v_cluster);
 368
 369        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 370                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 371
 372                /*
 373                 * Check the next leaf for any extents.
 374                 */
 375
 376                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 377                        goto no_more_extents;
 378
 379                ret = ocfs2_read_extent_block(ci,
 380                                              le64_to_cpu(eb->h_next_leaf_blk),
 381                                              &next_eb_bh);
 382                if (ret) {
 383                        mlog_errno(ret);
 384                        goto out;
 385                }
 386
 387                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 388                el = &next_eb->h_list;
 389                i = ocfs2_search_for_hole_index(el, v_cluster);
 390        }
 391
 392no_more_extents:
 393        if (i == le16_to_cpu(el->l_next_free_rec)) {
 394                /*
 395                 * We're at the end of our existing allocation. Just
 396                 * return the maximum number of clusters we could
 397                 * possibly allocate.
 398                 */
 399                *num_clusters = UINT_MAX - v_cluster;
 400        } else {
 401                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 402        }
 403
 404        ret = 0;
 405out:
 406        brelse(next_eb_bh);
 407        return ret;
 408}
 409
 410static int ocfs2_get_clusters_nocache(struct inode *inode,
 411                                      struct buffer_head *di_bh,
 412                                      u32 v_cluster, unsigned int *hole_len,
 413                                      struct ocfs2_extent_rec *ret_rec,
 414                                      unsigned int *is_last)
 415{
 416        int i, ret, tree_height, len;
 417        struct ocfs2_dinode *di;
 418        struct ocfs2_extent_block *uninitialized_var(eb);
 419        struct ocfs2_extent_list *el;
 420        struct ocfs2_extent_rec *rec;
 421        struct buffer_head *eb_bh = NULL;
 422
 423        memset(ret_rec, 0, sizeof(*ret_rec));
 424        if (is_last)
 425                *is_last = 0;
 426
 427        di = (struct ocfs2_dinode *) di_bh->b_data;
 428        el = &di->id2.i_list;
 429        tree_height = le16_to_cpu(el->l_tree_depth);
 430
 431        if (tree_height > 0) {
 432                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 433                                      &eb_bh);
 434                if (ret) {
 435                        mlog_errno(ret);
 436                        goto out;
 437                }
 438
 439                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 440                el = &eb->h_list;
 441
 442                if (el->l_tree_depth) {
 443                        ocfs2_error(inode->i_sb,
 444                                    "Inode %lu has non zero tree depth in leaf block %llu\n",
 445                                    inode->i_ino,
 446                                    (unsigned long long)eb_bh->b_blocknr);
 447                        ret = -EROFS;
 448                        goto out;
 449                }
 450        }
 451
 452        i = ocfs2_search_extent_list(el, v_cluster);
 453        if (i == -1) {
 454                /*
 455                 * Holes can be larger than the maximum size of an
 456                 * extent, so we return their lengths in a separate
 457                 * field.
 458                 */
 459                if (hole_len) {
 460                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 461                                                         el, eb_bh,
 462                                                         v_cluster, &len);
 463                        if (ret) {
 464                                mlog_errno(ret);
 465                                goto out;
 466                        }
 467
 468                        *hole_len = len;
 469                }
 470                goto out_hole;
 471        }
 472
 473        rec = &el->l_recs[i];
 474
 475        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 476
 477        if (!rec->e_blkno) {
 478                ocfs2_error(inode->i_sb,
 479                            "Inode %lu has bad extent record (%u, %u, 0)\n",
 480                            inode->i_ino,
 481                            le32_to_cpu(rec->e_cpos),
 482                            ocfs2_rec_clusters(el, rec));
 483                ret = -EROFS;
 484                goto out;
 485        }
 486
 487        *ret_rec = *rec;
 488
 489        /*
 490         * Checking for last extent is potentially expensive - we
 491         * might have to look at the next leaf over to see if it's
 492         * empty.
 493         *
 494         * The first two checks are to see whether the caller even
 495         * cares for this information, and if the extent is at least
 496         * the last in it's list.
 497         *
 498         * If those hold true, then the extent is last if any of the
 499         * additional conditions hold true:
 500         *  - Extent list is in-inode
 501         *  - Extent list is right-most
 502         *  - Extent list is 2nd to rightmost, with empty right-most
 503         */
 504        if (is_last) {
 505                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 506                        if (tree_height == 0)
 507                                *is_last = 1;
 508                        else if (eb->h_blkno == di->i_last_eb_blk)
 509                                *is_last = 1;
 510                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 511                                ret = ocfs2_last_eb_is_empty(inode, di);
 512                                if (ret < 0) {
 513                                        mlog_errno(ret);
 514                                        goto out;
 515                                }
 516                                if (ret == 1)
 517                                        *is_last = 1;
 518                        }
 519                }
 520        }
 521
 522out_hole:
 523        ret = 0;
 524out:
 525        brelse(eb_bh);
 526        return ret;
 527}
 528
 529static void ocfs2_relative_extent_offsets(struct super_block *sb,
 530                                          u32 v_cluster,
 531                                          struct ocfs2_extent_rec *rec,
 532                                          u32 *p_cluster, u32 *num_clusters)
 533
 534{
 535        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 536
 537        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 538        *p_cluster = *p_cluster + coff;
 539
 540        if (num_clusters)
 541                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 542}
 543
 544int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 545                             u32 *p_cluster, u32 *num_clusters,
 546                             struct ocfs2_extent_list *el,
 547                             unsigned int *extent_flags)
 548{
 549        int ret = 0, i;
 550        struct buffer_head *eb_bh = NULL;
 551        struct ocfs2_extent_block *eb;
 552        struct ocfs2_extent_rec *rec;
 553        u32 coff;
 554
 555        if (el->l_tree_depth) {
 556                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 557                                      &eb_bh);
 558                if (ret) {
 559                        mlog_errno(ret);
 560                        goto out;
 561                }
 562
 563                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 564                el = &eb->h_list;
 565
 566                if (el->l_tree_depth) {
 567                        ocfs2_error(inode->i_sb,
 568                                    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
 569                                    inode->i_ino,
 570                                    (unsigned long long)eb_bh->b_blocknr);
 571                        ret = -EROFS;
 572                        goto out;
 573                }
 574        }
 575
 576        i = ocfs2_search_extent_list(el, v_cluster);
 577        if (i == -1) {
 578                ret = -EROFS;
 579                mlog_errno(ret);
 580                goto out;
 581        } else {
 582                rec = &el->l_recs[i];
 583                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 584
 585                if (!rec->e_blkno) {
 586                        ocfs2_error(inode->i_sb,
 587                                    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
 588                                    inode->i_ino,
 589                                    le32_to_cpu(rec->e_cpos),
 590                                    ocfs2_rec_clusters(el, rec));
 591                        ret = -EROFS;
 592                        goto out;
 593                }
 594                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 595                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 596                                                    le64_to_cpu(rec->e_blkno));
 597                *p_cluster = *p_cluster + coff;
 598                if (num_clusters)
 599                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 600
 601                if (extent_flags)
 602                        *extent_flags = rec->e_flags;
 603        }
 604out:
 605        if (eb_bh)
 606                brelse(eb_bh);
 607        return ret;
 608}
 609
 610int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 611                       u32 *p_cluster, u32 *num_clusters,
 612                       unsigned int *extent_flags)
 613{
 614        int ret;
 615        unsigned int uninitialized_var(hole_len), flags = 0;
 616        struct buffer_head *di_bh = NULL;
 617        struct ocfs2_extent_rec rec;
 618
 619        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 620                ret = -ERANGE;
 621                mlog_errno(ret);
 622                goto out;
 623        }
 624
 625        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 626                                      num_clusters, extent_flags);
 627        if (ret == 0)
 628                goto out;
 629
 630        ret = ocfs2_read_inode_block(inode, &di_bh);
 631        if (ret) {
 632                mlog_errno(ret);
 633                goto out;
 634        }
 635
 636        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 637                                         &rec, NULL);
 638        if (ret) {
 639                mlog_errno(ret);
 640                goto out;
 641        }
 642
 643        if (rec.e_blkno == 0ULL) {
 644                /*
 645                 * A hole was found. Return some canned values that
 646                 * callers can key on. If asked for, num_clusters will
 647                 * be populated with the size of the hole.
 648                 */
 649                *p_cluster = 0;
 650                if (num_clusters) {
 651                        *num_clusters = hole_len;
 652                }
 653        } else {
 654                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 655                                              p_cluster, num_clusters);
 656                flags = rec.e_flags;
 657
 658                ocfs2_extent_map_insert_rec(inode, &rec);
 659        }
 660
 661        if (extent_flags)
 662                *extent_flags = flags;
 663
 664out:
 665        brelse(di_bh);
 666        return ret;
 667}
 668
 669/*
 670 * This expects alloc_sem to be held. The allocation cannot change at
 671 * all while the map is in the process of being updated.
 672 */
 673int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 674                                u64 *ret_count, unsigned int *extent_flags)
 675{
 676        int ret;
 677        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 678        u32 cpos, num_clusters, p_cluster;
 679        u64 boff = 0;
 680
 681        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 682
 683        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 684                                 extent_flags);
 685        if (ret) {
 686                mlog_errno(ret);
 687                goto out;
 688        }
 689
 690        /*
 691         * p_cluster == 0 indicates a hole.
 692         */
 693        if (p_cluster) {
 694                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 695                boff += (v_blkno & (u64)(bpc - 1));
 696        }
 697
 698        *p_blkno = boff;
 699
 700        if (ret_count) {
 701                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 702                *ret_count -= v_blkno & (u64)(bpc - 1);
 703        }
 704
 705out:
 706        return ret;
 707}
 708
 709/*
 710 * The ocfs2_fiemap_inline() may be a little bit misleading, since
 711 * it not only handles the fiemap for inlined files, but also deals
 712 * with the fast symlink, cause they have no difference for extent
 713 * mapping per se.
 714 */
 715static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 716                               struct fiemap_extent_info *fieinfo,
 717                               u64 map_start)
 718{
 719        int ret;
 720        unsigned int id_count;
 721        struct ocfs2_dinode *di;
 722        u64 phys;
 723        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 724        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 725
 726        di = (struct ocfs2_dinode *)di_bh->b_data;
 727        if (ocfs2_inode_is_fast_symlink(inode))
 728                id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 729        else
 730                id_count = le16_to_cpu(di->id2.i_data.id_count);
 731
 732        if (map_start < id_count) {
 733                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 734                if (ocfs2_inode_is_fast_symlink(inode))
 735                        phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 736                else
 737                        phys += offsetof(struct ocfs2_dinode,
 738                                         id2.i_data.id_data);
 739
 740                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 741                                              flags);
 742                if (ret < 0)
 743                        return ret;
 744        }
 745
 746        return 0;
 747}
 748
 749#define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
 750
 751int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 752                 u64 map_start, u64 map_len)
 753{
 754        int ret, is_last;
 755        u32 mapping_end, cpos;
 756        unsigned int hole_size;
 757        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 758        u64 len_bytes, phys_bytes, virt_bytes;
 759        struct buffer_head *di_bh = NULL;
 760        struct ocfs2_extent_rec rec;
 761
 762        ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
 763        if (ret)
 764                return ret;
 765
 766        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 767        if (ret) {
 768                mlog_errno(ret);
 769                goto out;
 770        }
 771
 772        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 773
 774        /*
 775         * Handle inline-data and fast symlink separately.
 776         */
 777        if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 778            ocfs2_inode_is_fast_symlink(inode)) {
 779                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 780                goto out_unlock;
 781        }
 782
 783        cpos = map_start >> osb->s_clustersize_bits;
 784        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 785                                               map_start + map_len);
 786        is_last = 0;
 787        while (cpos < mapping_end && !is_last) {
 788                u32 fe_flags;
 789
 790                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 791                                                 &hole_size, &rec, &is_last);
 792                if (ret) {
 793                        mlog_errno(ret);
 794                        goto out_unlock;
 795                }
 796
 797                if (rec.e_blkno == 0ULL) {
 798                        cpos += hole_size;
 799                        continue;
 800                }
 801
 802                fe_flags = 0;
 803                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 804                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 805                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 806                        fe_flags |= FIEMAP_EXTENT_SHARED;
 807                if (is_last)
 808                        fe_flags |= FIEMAP_EXTENT_LAST;
 809                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 810                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 811                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 812
 813                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 814                                              len_bytes, fe_flags);
 815                if (ret)
 816                        break;
 817
 818                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 819        }
 820
 821        if (ret > 0)
 822                ret = 0;
 823
 824out_unlock:
 825        brelse(di_bh);
 826
 827        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 828
 829        ocfs2_inode_unlock(inode, 0);
 830out:
 831
 832        return ret;
 833}
 834
 835int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 836{
 837        struct inode *inode = file->f_mapping->host;
 838        int ret;
 839        unsigned int is_last = 0, is_data = 0;
 840        u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 841        u32 cpos, cend, clen, hole_size;
 842        u64 extoff, extlen;
 843        struct buffer_head *di_bh = NULL;
 844        struct ocfs2_extent_rec rec;
 845
 846        BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 847
 848        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 849        if (ret) {
 850                mlog_errno(ret);
 851                goto out;
 852        }
 853
 854        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 855
 856        if (*offset >= i_size_read(inode)) {
 857                ret = -ENXIO;
 858                goto out_unlock;
 859        }
 860
 861        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 862                if (whence == SEEK_HOLE)
 863                        *offset = i_size_read(inode);
 864                goto out_unlock;
 865        }
 866
 867        clen = 0;
 868        cpos = *offset >> cs_bits;
 869        cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 870
 871        while (cpos < cend && !is_last) {
 872                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 873                                                 &rec, &is_last);
 874                if (ret) {
 875                        mlog_errno(ret);
 876                        goto out_unlock;
 877                }
 878
 879                extoff = cpos;
 880                extoff <<= cs_bits;
 881
 882                if (rec.e_blkno == 0ULL) {
 883                        clen = hole_size;
 884                        is_data = 0;
 885                } else {
 886                        clen = le16_to_cpu(rec.e_leaf_clusters) -
 887                                (cpos - le32_to_cpu(rec.e_cpos));
 888                        is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 889                }
 890
 891                if ((!is_data && whence == SEEK_HOLE) ||
 892                    (is_data && whence == SEEK_DATA)) {
 893                        if (extoff > *offset)
 894                                *offset = extoff;
 895                        goto out_unlock;
 896                }
 897
 898                if (!is_last)
 899                        cpos += clen;
 900        }
 901
 902        if (whence == SEEK_HOLE) {
 903                extoff = cpos;
 904                extoff <<= cs_bits;
 905                extlen = clen;
 906                extlen <<=  cs_bits;
 907
 908                if ((extoff + extlen) > i_size_read(inode))
 909                        extlen = i_size_read(inode) - extoff;
 910                extoff += extlen;
 911                if (extoff > *offset)
 912                        *offset = extoff;
 913                goto out_unlock;
 914        }
 915
 916        ret = -ENXIO;
 917
 918out_unlock:
 919
 920        brelse(di_bh);
 921
 922        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 923
 924        ocfs2_inode_unlock(inode, 0);
 925out:
 926        return ret;
 927}
 928
 929int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 930                           struct buffer_head *bhs[], int flags,
 931                           int (*validate)(struct super_block *sb,
 932                                           struct buffer_head *bh))
 933{
 934        int rc = 0;
 935        u64 p_block, p_count;
 936        int i, count, done = 0;
 937
 938        trace_ocfs2_read_virt_blocks(
 939             inode, (unsigned long long)v_block, nr, bhs, flags,
 940             validate);
 941
 942        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 943            i_size_read(inode)) {
 944                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 945                goto out;
 946        }
 947
 948        while (done < nr) {
 949                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 950                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 951                                                 &p_block, &p_count, NULL);
 952                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 953                if (rc) {
 954                        mlog_errno(rc);
 955                        break;
 956                }
 957
 958                if (!p_block) {
 959                        rc = -EIO;
 960                        mlog(ML_ERROR,
 961                             "Inode #%llu contains a hole at offset %llu\n",
 962                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
 963                             (unsigned long long)(v_block + done) <<
 964                             inode->i_sb->s_blocksize_bits);
 965                        break;
 966                }
 967
 968                count = nr - done;
 969                if (p_count < count)
 970                        count = p_count;
 971
 972                /*
 973                 * If the caller passed us bhs, they should have come
 974                 * from a previous readahead call to this function.  Thus,
 975                 * they should have the right b_blocknr.
 976                 */
 977                for (i = 0; i < count; i++) {
 978                        if (!bhs[done + i])
 979                                continue;
 980                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
 981                }
 982
 983                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
 984                                       bhs + done, flags, validate);
 985                if (rc) {
 986                        mlog_errno(rc);
 987                        break;
 988                }
 989                done += count;
 990        }
 991
 992out:
 993        return rc;
 994}
 995
 996
 997