linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * extent_map.c
   5 *
   6 * Block/Cluster mapping functions
   7 *
   8 * Copyright (C) 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License, version 2,  as published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public
  20 * License along with this program; if not, write to the
  21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  22 * Boston, MA 021110-1307, USA.
  23 */
  24
  25#include <linux/fs.h>
  26#include <linux/init.h>
  27#include <linux/types.h>
  28#include <linux/fiemap.h>
  29
  30#define MLOG_MASK_PREFIX ML_EXTENT_MAP
  31#include <cluster/masklog.h>
  32
  33#include "ocfs2.h"
  34
  35#include "alloc.h"
  36#include "dlmglue.h"
  37#include "extent_map.h"
  38#include "inode.h"
  39#include "super.h"
  40
  41#include "buffer_head_io.h"
  42
  43/*
  44 * The extent caching implementation is intentionally trivial.
  45 *
  46 * We only cache a small number of extents stored directly on the
  47 * inode, so linear order operations are acceptable. If we ever want
  48 * to increase the size of the extent map, then these algorithms must
  49 * get smarter.
  50 */
  51
  52void ocfs2_extent_map_init(struct inode *inode)
  53{
  54        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  55
  56        oi->ip_extent_map.em_num_items = 0;
  57        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  58}
  59
  60static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  61                                      unsigned int cpos,
  62                                      struct ocfs2_extent_map_item **ret_emi)
  63{
  64        unsigned int range;
  65        struct ocfs2_extent_map_item *emi;
  66
  67        *ret_emi = NULL;
  68
  69        list_for_each_entry(emi, &em->em_list, ei_list) {
  70                range = emi->ei_cpos + emi->ei_clusters;
  71
  72                if (cpos >= emi->ei_cpos && cpos < range) {
  73                        list_move(&emi->ei_list, &em->em_list);
  74
  75                        *ret_emi = emi;
  76                        break;
  77                }
  78        }
  79}
  80
  81static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  82                                   unsigned int *phys, unsigned int *len,
  83                                   unsigned int *flags)
  84{
  85        unsigned int coff;
  86        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  87        struct ocfs2_extent_map_item *emi;
  88
  89        spin_lock(&oi->ip_lock);
  90
  91        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  92        if (emi) {
  93                coff = cpos - emi->ei_cpos;
  94                *phys = emi->ei_phys + coff;
  95                if (len)
  96                        *len = emi->ei_clusters - coff;
  97                if (flags)
  98                        *flags = emi->ei_flags;
  99        }
 100
 101        spin_unlock(&oi->ip_lock);
 102
 103        if (emi == NULL)
 104                return -ENOENT;
 105
 106        return 0;
 107}
 108
 109/*
 110 * Forget about all clusters equal to or greater than cpos.
 111 */
 112void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 113{
 114        struct ocfs2_extent_map_item *emi, *n;
 115        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 116        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 117        LIST_HEAD(tmp_list);
 118        unsigned int range;
 119
 120        spin_lock(&oi->ip_lock);
 121        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 122                if (emi->ei_cpos >= cpos) {
 123                        /* Full truncate of this record. */
 124                        list_move(&emi->ei_list, &tmp_list);
 125                        BUG_ON(em->em_num_items == 0);
 126                        em->em_num_items--;
 127                        continue;
 128                }
 129
 130                range = emi->ei_cpos + emi->ei_clusters;
 131                if (range > cpos) {
 132                        /* Partial truncate */
 133                        emi->ei_clusters = cpos - emi->ei_cpos;
 134                }
 135        }
 136        spin_unlock(&oi->ip_lock);
 137
 138        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 139                list_del(&emi->ei_list);
 140                kfree(emi);
 141        }
 142}
 143
 144/*
 145 * Is any part of emi2 contained within emi1
 146 */
 147static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 148                                 struct ocfs2_extent_map_item *emi2)
 149{
 150        unsigned int range1, range2;
 151
 152        /*
 153         * Check if logical start of emi2 is inside emi1
 154         */
 155        range1 = emi1->ei_cpos + emi1->ei_clusters;
 156        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 157                return 1;
 158
 159        /*
 160         * Check if logical end of emi2 is inside emi1
 161         */
 162        range2 = emi2->ei_cpos + emi2->ei_clusters;
 163        if (range2 > emi1->ei_cpos && range2 <= range1)
 164                return 1;
 165
 166        return 0;
 167}
 168
 169static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 170                                  struct ocfs2_extent_map_item *src)
 171{
 172        dest->ei_cpos = src->ei_cpos;
 173        dest->ei_phys = src->ei_phys;
 174        dest->ei_clusters = src->ei_clusters;
 175        dest->ei_flags = src->ei_flags;
 176}
 177
 178/*
 179 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 180 * otherwise.
 181 */
 182static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 183                                         struct ocfs2_extent_map_item *ins)
 184{
 185        /*
 186         * Handle contiguousness
 187         */
 188        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 189            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 190            ins->ei_flags == emi->ei_flags) {
 191                emi->ei_clusters += ins->ei_clusters;
 192                return 1;
 193        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 194                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
 195                   ins->ei_flags == emi->ei_flags) {
 196                emi->ei_phys = ins->ei_phys;
 197                emi->ei_cpos = ins->ei_cpos;
 198                emi->ei_clusters += ins->ei_clusters;
 199                return 1;
 200        }
 201
 202        /*
 203         * Overlapping extents - this shouldn't happen unless we've
 204         * split an extent to change it's flags. That is exceedingly
 205         * rare, so there's no sense in trying to optimize it yet.
 206         */
 207        if (ocfs2_ei_is_contained(emi, ins) ||
 208            ocfs2_ei_is_contained(ins, emi)) {
 209                ocfs2_copy_emi_fields(emi, ins);
 210                return 1;
 211        }
 212
 213        /* No merge was possible. */
 214        return 0;
 215}
 216
 217/*
 218 * In order to reduce complexity on the caller, this insert function
 219 * is intentionally liberal in what it will accept.
 220 *
 221 * The only rule is that the truncate call *must* be used whenever
 222 * records have been deleted. This avoids inserting overlapping
 223 * records with different physical mappings.
 224 */
 225void ocfs2_extent_map_insert_rec(struct inode *inode,
 226                                 struct ocfs2_extent_rec *rec)
 227{
 228        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 229        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 230        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 231        struct ocfs2_extent_map_item ins;
 232
 233        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 234        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 235                                               le64_to_cpu(rec->e_blkno));
 236        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 237        ins.ei_flags = rec->e_flags;
 238
 239search:
 240        spin_lock(&oi->ip_lock);
 241
 242        list_for_each_entry(emi, &em->em_list, ei_list) {
 243                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 244                        list_move(&emi->ei_list, &em->em_list);
 245                        spin_unlock(&oi->ip_lock);
 246                        goto out;
 247                }
 248        }
 249
 250        /*
 251         * No item could be merged.
 252         *
 253         * Either allocate and add a new item, or overwrite the last recently
 254         * inserted.
 255         */
 256
 257        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 258                if (new_emi == NULL) {
 259                        spin_unlock(&oi->ip_lock);
 260
 261                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 262                        if (new_emi == NULL)
 263                                goto out;
 264
 265                        goto search;
 266                }
 267
 268                ocfs2_copy_emi_fields(new_emi, &ins);
 269                list_add(&new_emi->ei_list, &em->em_list);
 270                em->em_num_items++;
 271                new_emi = NULL;
 272        } else {
 273                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 274                emi = list_entry(em->em_list.prev,
 275                                 struct ocfs2_extent_map_item, ei_list);
 276                list_move(&emi->ei_list, &em->em_list);
 277                ocfs2_copy_emi_fields(emi, &ins);
 278        }
 279
 280        spin_unlock(&oi->ip_lock);
 281
 282out:
 283        if (new_emi)
 284                kfree(new_emi);
 285}
 286
 287static int ocfs2_last_eb_is_empty(struct inode *inode,
 288                                  struct ocfs2_dinode *di)
 289{
 290        int ret, next_free;
 291        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 292        struct buffer_head *eb_bh = NULL;
 293        struct ocfs2_extent_block *eb;
 294        struct ocfs2_extent_list *el;
 295
 296        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 297        if (ret) {
 298                mlog_errno(ret);
 299                goto out;
 300        }
 301
 302        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 303        el = &eb->h_list;
 304
 305        if (el->l_tree_depth) {
 306                ocfs2_error(inode->i_sb,
 307                            "Inode %lu has non zero tree depth in "
 308                            "leaf block %llu\n", inode->i_ino,
 309                            (unsigned long long)eb_bh->b_blocknr);
 310                ret = -EROFS;
 311                goto out;
 312        }
 313
 314        next_free = le16_to_cpu(el->l_next_free_rec);
 315
 316        if (next_free == 0 ||
 317            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 318                ret = 1;
 319
 320out:
 321        brelse(eb_bh);
 322        return ret;
 323}
 324
 325/*
 326 * Return the 1st index within el which contains an extent start
 327 * larger than v_cluster.
 328 */
 329static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 330                                       u32 v_cluster)
 331{
 332        int i;
 333        struct ocfs2_extent_rec *rec;
 334
 335        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 336                rec = &el->l_recs[i];
 337
 338                if (v_cluster < le32_to_cpu(rec->e_cpos))
 339                        break;
 340        }
 341
 342        return i;
 343}
 344
 345/*
 346 * Figure out the size of a hole which starts at v_cluster within the given
 347 * extent list.
 348 *
 349 * If there is no more allocation past v_cluster, we return the maximum
 350 * cluster size minus v_cluster.
 351 *
 352 * If we have in-inode extents, then el points to the dinode list and
 353 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 354 * containing el.
 355 */
 356int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 357                               struct ocfs2_extent_list *el,
 358                               struct buffer_head *eb_bh,
 359                               u32 v_cluster,
 360                               u32 *num_clusters)
 361{
 362        int ret, i;
 363        struct buffer_head *next_eb_bh = NULL;
 364        struct ocfs2_extent_block *eb, *next_eb;
 365
 366        i = ocfs2_search_for_hole_index(el, v_cluster);
 367
 368        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 369                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 370
 371                /*
 372                 * Check the next leaf for any extents.
 373                 */
 374
 375                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 376                        goto no_more_extents;
 377
 378                ret = ocfs2_read_extent_block(ci,
 379                                              le64_to_cpu(eb->h_next_leaf_blk),
 380                                              &next_eb_bh);
 381                if (ret) {
 382                        mlog_errno(ret);
 383                        goto out;
 384                }
 385
 386                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 387                el = &next_eb->h_list;
 388                i = ocfs2_search_for_hole_index(el, v_cluster);
 389        }
 390
 391no_more_extents:
 392        if (i == le16_to_cpu(el->l_next_free_rec)) {
 393                /*
 394                 * We're at the end of our existing allocation. Just
 395                 * return the maximum number of clusters we could
 396                 * possibly allocate.
 397                 */
 398                *num_clusters = UINT_MAX - v_cluster;
 399        } else {
 400                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 401        }
 402
 403        ret = 0;
 404out:
 405        brelse(next_eb_bh);
 406        return ret;
 407}
 408
 409static int ocfs2_get_clusters_nocache(struct inode *inode,
 410                                      struct buffer_head *di_bh,
 411                                      u32 v_cluster, unsigned int *hole_len,
 412                                      struct ocfs2_extent_rec *ret_rec,
 413                                      unsigned int *is_last)
 414{
 415        int i, ret, tree_height, len;
 416        struct ocfs2_dinode *di;
 417        struct ocfs2_extent_block *uninitialized_var(eb);
 418        struct ocfs2_extent_list *el;
 419        struct ocfs2_extent_rec *rec;
 420        struct buffer_head *eb_bh = NULL;
 421
 422        memset(ret_rec, 0, sizeof(*ret_rec));
 423        if (is_last)
 424                *is_last = 0;
 425
 426        di = (struct ocfs2_dinode *) di_bh->b_data;
 427        el = &di->id2.i_list;
 428        tree_height = le16_to_cpu(el->l_tree_depth);
 429
 430        if (tree_height > 0) {
 431                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 432                                      &eb_bh);
 433                if (ret) {
 434                        mlog_errno(ret);
 435                        goto out;
 436                }
 437
 438                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 439                el = &eb->h_list;
 440
 441                if (el->l_tree_depth) {
 442                        ocfs2_error(inode->i_sb,
 443                                    "Inode %lu has non zero tree depth in "
 444                                    "leaf block %llu\n", inode->i_ino,
 445                                    (unsigned long long)eb_bh->b_blocknr);
 446                        ret = -EROFS;
 447                        goto out;
 448                }
 449        }
 450
 451        i = ocfs2_search_extent_list(el, v_cluster);
 452        if (i == -1) {
 453                /*
 454                 * Holes can be larger than the maximum size of an
 455                 * extent, so we return their lengths in a seperate
 456                 * field.
 457                 */
 458                if (hole_len) {
 459                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 460                                                         el, eb_bh,
 461                                                         v_cluster, &len);
 462                        if (ret) {
 463                                mlog_errno(ret);
 464                                goto out;
 465                        }
 466
 467                        *hole_len = len;
 468                }
 469                goto out_hole;
 470        }
 471
 472        rec = &el->l_recs[i];
 473
 474        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 475
 476        if (!rec->e_blkno) {
 477                ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
 478                            "record (%u, %u, 0)", inode->i_ino,
 479                            le32_to_cpu(rec->e_cpos),
 480                            ocfs2_rec_clusters(el, rec));
 481                ret = -EROFS;
 482                goto out;
 483        }
 484
 485        *ret_rec = *rec;
 486
 487        /*
 488         * Checking for last extent is potentially expensive - we
 489         * might have to look at the next leaf over to see if it's
 490         * empty.
 491         *
 492         * The first two checks are to see whether the caller even
 493         * cares for this information, and if the extent is at least
 494         * the last in it's list.
 495         *
 496         * If those hold true, then the extent is last if any of the
 497         * additional conditions hold true:
 498         *  - Extent list is in-inode
 499         *  - Extent list is right-most
 500         *  - Extent list is 2nd to rightmost, with empty right-most
 501         */
 502        if (is_last) {
 503                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 504                        if (tree_height == 0)
 505                                *is_last = 1;
 506                        else if (eb->h_blkno == di->i_last_eb_blk)
 507                                *is_last = 1;
 508                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 509                                ret = ocfs2_last_eb_is_empty(inode, di);
 510                                if (ret < 0) {
 511                                        mlog_errno(ret);
 512                                        goto out;
 513                                }
 514                                if (ret == 1)
 515                                        *is_last = 1;
 516                        }
 517                }
 518        }
 519
 520out_hole:
 521        ret = 0;
 522out:
 523        brelse(eb_bh);
 524        return ret;
 525}
 526
 527static void ocfs2_relative_extent_offsets(struct super_block *sb,
 528                                          u32 v_cluster,
 529                                          struct ocfs2_extent_rec *rec,
 530                                          u32 *p_cluster, u32 *num_clusters)
 531
 532{
 533        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 534
 535        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 536        *p_cluster = *p_cluster + coff;
 537
 538        if (num_clusters)
 539                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 540}
 541
 542int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 543                             u32 *p_cluster, u32 *num_clusters,
 544                             struct ocfs2_extent_list *el,
 545                             unsigned int *extent_flags)
 546{
 547        int ret = 0, i;
 548        struct buffer_head *eb_bh = NULL;
 549        struct ocfs2_extent_block *eb;
 550        struct ocfs2_extent_rec *rec;
 551        u32 coff;
 552
 553        if (el->l_tree_depth) {
 554                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 555                                      &eb_bh);
 556                if (ret) {
 557                        mlog_errno(ret);
 558                        goto out;
 559                }
 560
 561                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 562                el = &eb->h_list;
 563
 564                if (el->l_tree_depth) {
 565                        ocfs2_error(inode->i_sb,
 566                                    "Inode %lu has non zero tree depth in "
 567                                    "xattr leaf block %llu\n", inode->i_ino,
 568                                    (unsigned long long)eb_bh->b_blocknr);
 569                        ret = -EROFS;
 570                        goto out;
 571                }
 572        }
 573
 574        i = ocfs2_search_extent_list(el, v_cluster);
 575        if (i == -1) {
 576                ret = -EROFS;
 577                mlog_errno(ret);
 578                goto out;
 579        } else {
 580                rec = &el->l_recs[i];
 581                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 582
 583                if (!rec->e_blkno) {
 584                        ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
 585                                    "record (%u, %u, 0) in xattr", inode->i_ino,
 586                                    le32_to_cpu(rec->e_cpos),
 587                                    ocfs2_rec_clusters(el, rec));
 588                        ret = -EROFS;
 589                        goto out;
 590                }
 591                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 592                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 593                                                    le64_to_cpu(rec->e_blkno));
 594                *p_cluster = *p_cluster + coff;
 595                if (num_clusters)
 596                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 597
 598                if (extent_flags)
 599                        *extent_flags = rec->e_flags;
 600        }
 601out:
 602        if (eb_bh)
 603                brelse(eb_bh);
 604        return ret;
 605}
 606
 607int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 608                       u32 *p_cluster, u32 *num_clusters,
 609                       unsigned int *extent_flags)
 610{
 611        int ret;
 612        unsigned int uninitialized_var(hole_len), flags = 0;
 613        struct buffer_head *di_bh = NULL;
 614        struct ocfs2_extent_rec rec;
 615
 616        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 617                ret = -ERANGE;
 618                mlog_errno(ret);
 619                goto out;
 620        }
 621
 622        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 623                                      num_clusters, extent_flags);
 624        if (ret == 0)
 625                goto out;
 626
 627        ret = ocfs2_read_inode_block(inode, &di_bh);
 628        if (ret) {
 629                mlog_errno(ret);
 630                goto out;
 631        }
 632
 633        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 634                                         &rec, NULL);
 635        if (ret) {
 636                mlog_errno(ret);
 637                goto out;
 638        }
 639
 640        if (rec.e_blkno == 0ULL) {
 641                /*
 642                 * A hole was found. Return some canned values that
 643                 * callers can key on. If asked for, num_clusters will
 644                 * be populated with the size of the hole.
 645                 */
 646                *p_cluster = 0;
 647                if (num_clusters) {
 648                        *num_clusters = hole_len;
 649                }
 650        } else {
 651                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 652                                              p_cluster, num_clusters);
 653                flags = rec.e_flags;
 654
 655                ocfs2_extent_map_insert_rec(inode, &rec);
 656        }
 657
 658        if (extent_flags)
 659                *extent_flags = flags;
 660
 661out:
 662        brelse(di_bh);
 663        return ret;
 664}
 665
 666/*
 667 * This expects alloc_sem to be held. The allocation cannot change at
 668 * all while the map is in the process of being updated.
 669 */
 670int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 671                                u64 *ret_count, unsigned int *extent_flags)
 672{
 673        int ret;
 674        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 675        u32 cpos, num_clusters, p_cluster;
 676        u64 boff = 0;
 677
 678        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 679
 680        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 681                                 extent_flags);
 682        if (ret) {
 683                mlog_errno(ret);
 684                goto out;
 685        }
 686
 687        /*
 688         * p_cluster == 0 indicates a hole.
 689         */
 690        if (p_cluster) {
 691                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 692                boff += (v_blkno & (u64)(bpc - 1));
 693        }
 694
 695        *p_blkno = boff;
 696
 697        if (ret_count) {
 698                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 699                *ret_count -= v_blkno & (u64)(bpc - 1);
 700        }
 701
 702out:
 703        return ret;
 704}
 705
 706static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 707                               struct fiemap_extent_info *fieinfo,
 708                               u64 map_start)
 709{
 710        int ret;
 711        unsigned int id_count;
 712        struct ocfs2_dinode *di;
 713        u64 phys;
 714        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 715        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 716
 717        di = (struct ocfs2_dinode *)di_bh->b_data;
 718        id_count = le16_to_cpu(di->id2.i_data.id_count);
 719
 720        if (map_start < id_count) {
 721                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 722                phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
 723
 724                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 725                                              flags);
 726                if (ret < 0)
 727                        return ret;
 728        }
 729
 730        return 0;
 731}
 732
 733#define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
 734
 735int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 736                 u64 map_start, u64 map_len)
 737{
 738        int ret, is_last;
 739        u32 mapping_end, cpos;
 740        unsigned int hole_size;
 741        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 742        u64 len_bytes, phys_bytes, virt_bytes;
 743        struct buffer_head *di_bh = NULL;
 744        struct ocfs2_extent_rec rec;
 745
 746        ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
 747        if (ret)
 748                return ret;
 749
 750        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 751        if (ret) {
 752                mlog_errno(ret);
 753                goto out;
 754        }
 755
 756        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 757
 758        /*
 759         * Handle inline-data separately.
 760         */
 761        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 762                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 763                goto out_unlock;
 764        }
 765
 766        cpos = map_start >> osb->s_clustersize_bits;
 767        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 768                                               map_start + map_len);
 769        mapping_end -= cpos;
 770        is_last = 0;
 771        while (cpos < mapping_end && !is_last) {
 772                u32 fe_flags;
 773
 774                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 775                                                 &hole_size, &rec, &is_last);
 776                if (ret) {
 777                        mlog_errno(ret);
 778                        goto out;
 779                }
 780
 781                if (rec.e_blkno == 0ULL) {
 782                        cpos += hole_size;
 783                        continue;
 784                }
 785
 786                fe_flags = 0;
 787                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 788                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 789                if (is_last)
 790                        fe_flags |= FIEMAP_EXTENT_LAST;
 791                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 792                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 793                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 794
 795                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 796                                              len_bytes, fe_flags);
 797                if (ret)
 798                        break;
 799
 800                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 801        }
 802
 803        if (ret > 0)
 804                ret = 0;
 805
 806out_unlock:
 807        brelse(di_bh);
 808
 809        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 810
 811        ocfs2_inode_unlock(inode, 0);
 812out:
 813
 814        return ret;
 815}
 816
 817int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 818                           struct buffer_head *bhs[], int flags,
 819                           int (*validate)(struct super_block *sb,
 820                                           struct buffer_head *bh))
 821{
 822        int rc = 0;
 823        u64 p_block, p_count;
 824        int i, count, done = 0;
 825
 826        mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
 827                   "flags = %x, validate = %p)\n",
 828                   inode, (unsigned long long)v_block, nr, bhs, flags,
 829                   validate);
 830
 831        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 832            i_size_read(inode)) {
 833                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 834                goto out;
 835        }
 836
 837        while (done < nr) {
 838                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 839                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 840                                                 &p_block, &p_count, NULL);
 841                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 842                if (rc) {
 843                        mlog_errno(rc);
 844                        break;
 845                }
 846
 847                if (!p_block) {
 848                        rc = -EIO;
 849                        mlog(ML_ERROR,
 850                             "Inode #%llu contains a hole at offset %llu\n",
 851                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
 852                             (unsigned long long)(v_block + done) <<
 853                             inode->i_sb->s_blocksize_bits);
 854                        break;
 855                }
 856
 857                count = nr - done;
 858                if (p_count < count)
 859                        count = p_count;
 860
 861                /*
 862                 * If the caller passed us bhs, they should have come
 863                 * from a previous readahead call to this function.  Thus,
 864                 * they should have the right b_blocknr.
 865                 */
 866                for (i = 0; i < count; i++) {
 867                        if (!bhs[done + i])
 868                                continue;
 869                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
 870                }
 871
 872                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
 873                                       bhs + done, flags, validate);
 874                if (rc) {
 875                        mlog_errno(rc);
 876                        break;
 877                }
 878                done += count;
 879        }
 880
 881out:
 882        mlog_exit(rc);
 883        return rc;
 884}
 885
 886
 887