linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * extent_map.c
   5 *
   6 * Block/Cluster mapping functions
   7 *
   8 * Copyright (C) 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License, version 2,  as published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public
  20 * License along with this program; if not, write to the
  21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  22 * Boston, MA 021110-1307, USA.
  23 */
  24
  25#include <linux/fs.h>
  26#include <linux/init.h>
  27#include <linux/slab.h>
  28#include <linux/types.h>
  29#include <linux/fiemap.h>
  30
  31#include <cluster/masklog.h>
  32
  33#include "ocfs2.h"
  34
  35#include "alloc.h"
  36#include "dlmglue.h"
  37#include "extent_map.h"
  38#include "inode.h"
  39#include "super.h"
  40#include "symlink.h"
  41#include "ocfs2_trace.h"
  42
  43#include "buffer_head_io.h"
  44
  45/*
  46 * The extent caching implementation is intentionally trivial.
  47 *
  48 * We only cache a small number of extents stored directly on the
  49 * inode, so linear order operations are acceptable. If we ever want
  50 * to increase the size of the extent map, then these algorithms must
  51 * get smarter.
  52 */
  53
  54void ocfs2_extent_map_init(struct inode *inode)
  55{
  56        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  57
  58        oi->ip_extent_map.em_num_items = 0;
  59        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  60}
  61
  62static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  63                                      unsigned int cpos,
  64                                      struct ocfs2_extent_map_item **ret_emi)
  65{
  66        unsigned int range;
  67        struct ocfs2_extent_map_item *emi;
  68
  69        *ret_emi = NULL;
  70
  71        list_for_each_entry(emi, &em->em_list, ei_list) {
  72                range = emi->ei_cpos + emi->ei_clusters;
  73
  74                if (cpos >= emi->ei_cpos && cpos < range) {
  75                        list_move(&emi->ei_list, &em->em_list);
  76
  77                        *ret_emi = emi;
  78                        break;
  79                }
  80        }
  81}
  82
  83static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  84                                   unsigned int *phys, unsigned int *len,
  85                                   unsigned int *flags)
  86{
  87        unsigned int coff;
  88        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  89        struct ocfs2_extent_map_item *emi;
  90
  91        spin_lock(&oi->ip_lock);
  92
  93        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  94        if (emi) {
  95                coff = cpos - emi->ei_cpos;
  96                *phys = emi->ei_phys + coff;
  97                if (len)
  98                        *len = emi->ei_clusters - coff;
  99                if (flags)
 100                        *flags = emi->ei_flags;
 101        }
 102
 103        spin_unlock(&oi->ip_lock);
 104
 105        if (emi == NULL)
 106                return -ENOENT;
 107
 108        return 0;
 109}
 110
 111/*
 112 * Forget about all clusters equal to or greater than cpos.
 113 */
 114void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 115{
 116        struct ocfs2_extent_map_item *emi, *n;
 117        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 118        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 119        LIST_HEAD(tmp_list);
 120        unsigned int range;
 121
 122        spin_lock(&oi->ip_lock);
 123        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 124                if (emi->ei_cpos >= cpos) {
 125                        /* Full truncate of this record. */
 126                        list_move(&emi->ei_list, &tmp_list);
 127                        BUG_ON(em->em_num_items == 0);
 128                        em->em_num_items--;
 129                        continue;
 130                }
 131
 132                range = emi->ei_cpos + emi->ei_clusters;
 133                if (range > cpos) {
 134                        /* Partial truncate */
 135                        emi->ei_clusters = cpos - emi->ei_cpos;
 136                }
 137        }
 138        spin_unlock(&oi->ip_lock);
 139
 140        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 141                list_del(&emi->ei_list);
 142                kfree(emi);
 143        }
 144}
 145
 146/*
 147 * Is any part of emi2 contained within emi1
 148 */
 149static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 150                                 struct ocfs2_extent_map_item *emi2)
 151{
 152        unsigned int range1, range2;
 153
 154        /*
 155         * Check if logical start of emi2 is inside emi1
 156         */
 157        range1 = emi1->ei_cpos + emi1->ei_clusters;
 158        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 159                return 1;
 160
 161        /*
 162         * Check if logical end of emi2 is inside emi1
 163         */
 164        range2 = emi2->ei_cpos + emi2->ei_clusters;
 165        if (range2 > emi1->ei_cpos && range2 <= range1)
 166                return 1;
 167
 168        return 0;
 169}
 170
 171static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 172                                  struct ocfs2_extent_map_item *src)
 173{
 174        dest->ei_cpos = src->ei_cpos;
 175        dest->ei_phys = src->ei_phys;
 176        dest->ei_clusters = src->ei_clusters;
 177        dest->ei_flags = src->ei_flags;
 178}
 179
 180/*
 181 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 182 * otherwise.
 183 */
 184static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 185                                         struct ocfs2_extent_map_item *ins)
 186{
 187        /*
 188         * Handle contiguousness
 189         */
 190        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 191            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 192            ins->ei_flags == emi->ei_flags) {
 193                emi->ei_clusters += ins->ei_clusters;
 194                return 1;
 195        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 196                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 197                   ins->ei_flags == emi->ei_flags) {
 198                emi->ei_phys = ins->ei_phys;
 199                emi->ei_cpos = ins->ei_cpos;
 200                emi->ei_clusters += ins->ei_clusters;
 201                return 1;
 202        }
 203
 204        /*
 205         * Overlapping extents - this shouldn't happen unless we've
 206         * split an extent to change it's flags. That is exceedingly
 207         * rare, so there's no sense in trying to optimize it yet.
 208         */
 209        if (ocfs2_ei_is_contained(emi, ins) ||
 210            ocfs2_ei_is_contained(ins, emi)) {
 211                ocfs2_copy_emi_fields(emi, ins);
 212                return 1;
 213        }
 214
 215        /* No merge was possible. */
 216        return 0;
 217}
 218
 219/*
 220 * In order to reduce complexity on the caller, this insert function
 221 * is intentionally liberal in what it will accept.
 222 *
 223 * The only rule is that the truncate call *must* be used whenever
 224 * records have been deleted. This avoids inserting overlapping
 225 * records with different physical mappings.
 226 */
 227void ocfs2_extent_map_insert_rec(struct inode *inode,
 228                                 struct ocfs2_extent_rec *rec)
 229{
 230        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 231        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 232        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 233        struct ocfs2_extent_map_item ins;
 234
 235        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 236        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 237                                               le64_to_cpu(rec->e_blkno));
 238        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 239        ins.ei_flags = rec->e_flags;
 240
 241search:
 242        spin_lock(&oi->ip_lock);
 243
 244        list_for_each_entry(emi, &em->em_list, ei_list) {
 245                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 246                        list_move(&emi->ei_list, &em->em_list);
 247                        spin_unlock(&oi->ip_lock);
 248                        goto out;
 249                }
 250        }
 251
 252        /*
 253         * No item could be merged.
 254         *
 255         * Either allocate and add a new item, or overwrite the last recently
 256         * inserted.
 257         */
 258
 259        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 260                if (new_emi == NULL) {
 261                        spin_unlock(&oi->ip_lock);
 262
 263                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 264                        if (new_emi == NULL)
 265                                goto out;
 266
 267                        goto search;
 268                }
 269
 270                ocfs2_copy_emi_fields(new_emi, &ins);
 271                list_add(&new_emi->ei_list, &em->em_list);
 272                em->em_num_items++;
 273                new_emi = NULL;
 274        } else {
 275                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 276                emi = list_entry(em->em_list.prev,
 277                                 struct ocfs2_extent_map_item, ei_list);
 278                list_move(&emi->ei_list, &em->em_list);
 279                ocfs2_copy_emi_fields(emi, &ins);
 280        }
 281
 282        spin_unlock(&oi->ip_lock);
 283
 284out:
 285        kfree(new_emi);
 286}
 287
 288static int ocfs2_last_eb_is_empty(struct inode *inode,
 289                                  struct ocfs2_dinode *di)
 290{
 291        int ret, next_free;
 292        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 293        struct buffer_head *eb_bh = NULL;
 294        struct ocfs2_extent_block *eb;
 295        struct ocfs2_extent_list *el;
 296
 297        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 298        if (ret) {
 299                mlog_errno(ret);
 300                goto out;
 301        }
 302
 303        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 304        el = &eb->h_list;
 305
 306        if (el->l_tree_depth) {
 307                ocfs2_error(inode->i_sb,
 308                            "Inode %lu has non zero tree depth in "
 309                            "leaf block %llu\n", inode->i_ino,
 310                            (unsigned long long)eb_bh->b_blocknr);
 311                ret = -EROFS;
 312                goto out;
 313        }
 314
 315        next_free = le16_to_cpu(el->l_next_free_rec);
 316
 317        if (next_free == 0 ||
 318            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 319                ret = 1;
 320
 321out:
 322        brelse(eb_bh);
 323        return ret;
 324}
 325
 326/*
 327 * Return the 1st index within el which contains an extent start
 328 * larger than v_cluster.
 329 */
 330static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 331                                       u32 v_cluster)
 332{
 333        int i;
 334        struct ocfs2_extent_rec *rec;
 335
 336        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 337                rec = &el->l_recs[i];
 338
 339                if (v_cluster < le32_to_cpu(rec->e_cpos))
 340                        break;
 341        }
 342
 343        return i;
 344}
 345
 346/*
 347 * Figure out the size of a hole which starts at v_cluster within the given
 348 * extent list.
 349 *
 350 * If there is no more allocation past v_cluster, we return the maximum
 351 * cluster size minus v_cluster.
 352 *
 353 * If we have in-inode extents, then el points to the dinode list and
 354 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 355 * containing el.
 356 */
 357int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 358                               struct ocfs2_extent_list *el,
 359                               struct buffer_head *eb_bh,
 360                               u32 v_cluster,
 361                               u32 *num_clusters)
 362{
 363        int ret, i;
 364        struct buffer_head *next_eb_bh = NULL;
 365        struct ocfs2_extent_block *eb, *next_eb;
 366
 367        i = ocfs2_search_for_hole_index(el, v_cluster);
 368
 369        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 370                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 371
 372                /*
 373                 * Check the next leaf for any extents.
 374                 */
 375
 376                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 377                        goto no_more_extents;
 378
 379                ret = ocfs2_read_extent_block(ci,
 380                                              le64_to_cpu(eb->h_next_leaf_blk),
 381                                              &next_eb_bh);
 382                if (ret) {
 383                        mlog_errno(ret);
 384                        goto out;
 385                }
 386
 387                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 388                el = &next_eb->h_list;
 389                i = ocfs2_search_for_hole_index(el, v_cluster);
 390        }
 391
 392no_more_extents:
 393        if (i == le16_to_cpu(el->l_next_free_rec)) {
 394                /*
 395                 * We're at the end of our existing allocation. Just
 396                 * return the maximum number of clusters we could
 397                 * possibly allocate.
 398                 */
 399                *num_clusters = UINT_MAX - v_cluster;
 400        } else {
 401                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 402        }
 403
 404        ret = 0;
 405out:
 406        brelse(next_eb_bh);
 407        return ret;
 408}
 409
 410static int ocfs2_get_clusters_nocache(struct inode *inode,
 411                                      struct buffer_head *di_bh,
 412                                      u32 v_cluster, unsigned int *hole_len,
 413                                      struct ocfs2_extent_rec *ret_rec,
 414                                      unsigned int *is_last)
 415{
 416        int i, ret, tree_height, len;
 417        struct ocfs2_dinode *di;
 418        struct ocfs2_extent_block *uninitialized_var(eb);
 419        struct ocfs2_extent_list *el;
 420        struct ocfs2_extent_rec *rec;
 421        struct buffer_head *eb_bh = NULL;
 422
 423        memset(ret_rec, 0, sizeof(*ret_rec));
 424        if (is_last)
 425                *is_last = 0;
 426
 427        di = (struct ocfs2_dinode *) di_bh->b_data;
 428        el = &di->id2.i_list;
 429        tree_height = le16_to_cpu(el->l_tree_depth);
 430
 431        if (tree_height > 0) {
 432                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 433                                      &eb_bh);
 434                if (ret) {
 435                        mlog_errno(ret);
 436                        goto out;
 437                }
 438
 439                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 440                el = &eb->h_list;
 441
 442                if (el->l_tree_depth) {
 443                        ocfs2_error(inode->i_sb,
 444                                    "Inode %lu has non zero tree depth in "
 445                                    "leaf block %llu\n", inode->i_ino,
 446                                    (unsigned long long)eb_bh->b_blocknr);
 447                        ret = -EROFS;
 448                        goto out;
 449                }
 450        }
 451
 452        i = ocfs2_search_extent_list(el, v_cluster);
 453        if (i == -1) {
 454                /*
 455                 * Holes can be larger than the maximum size of an
 456                 * extent, so we return their lengths in a separate
 457                 * field.
 458                 */
 459                if (hole_len) {
 460                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 461                                                         el, eb_bh,
 462                                                         v_cluster, &len);
 463                        if (ret) {
 464                                mlog_errno(ret);
 465                                goto out;
 466                        }
 467
 468                        *hole_len = len;
 469                }
 470                goto out_hole;
 471        }
 472
 473        rec = &el->l_recs[i];
 474
 475        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 476
 477        if (!rec->e_blkno) {
 478                ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
 479                            "record (%u, %u, 0)", inode->i_ino,
 480                            le32_to_cpu(rec->e_cpos),
 481                            ocfs2_rec_clusters(el, rec));
 482                ret = -EROFS;
 483                goto out;
 484        }
 485
 486        *ret_rec = *rec;
 487
 488        /*
 489         * Checking for last extent is potentially expensive - we
 490         * might have to look at the next leaf over to see if it's
 491         * empty.
 492         *
 493         * The first two checks are to see whether the caller even
 494         * cares for this information, and if the extent is at least
 495         * the last in it's list.
 496         *
 497         * If those hold true, then the extent is last if any of the
 498         * additional conditions hold true:
 499         *  - Extent list is in-inode
 500         *  - Extent list is right-most
 501         *  - Extent list is 2nd to rightmost, with empty right-most
 502         */
 503        if (is_last) {
 504                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 505                        if (tree_height == 0)
 506                                *is_last = 1;
 507                        else if (eb->h_blkno == di->i_last_eb_blk)
 508                                *is_last = 1;
 509                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 510                                ret = ocfs2_last_eb_is_empty(inode, di);
 511                                if (ret < 0) {
 512                                        mlog_errno(ret);
 513                                        goto out;
 514                                }
 515                                if (ret == 1)
 516                                        *is_last = 1;
 517                        }
 518                }
 519        }
 520
 521out_hole:
 522        ret = 0;
 523out:
 524        brelse(eb_bh);
 525        return ret;
 526}
 527
 528static void ocfs2_relative_extent_offsets(struct super_block *sb,
 529                                          u32 v_cluster,
 530                                          struct ocfs2_extent_rec *rec,
 531                                          u32 *p_cluster, u32 *num_clusters)
 532
 533{
 534        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 535
 536        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 537        *p_cluster = *p_cluster + coff;
 538
 539        if (num_clusters)
 540                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 541}
 542
 543int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 544                             u32 *p_cluster, u32 *num_clusters,
 545                             struct ocfs2_extent_list *el,
 546                             unsigned int *extent_flags)
 547{
 548        int ret = 0, i;
 549        struct buffer_head *eb_bh = NULL;
 550        struct ocfs2_extent_block *eb;
 551        struct ocfs2_extent_rec *rec;
 552        u32 coff;
 553
 554        if (el->l_tree_depth) {
 555                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 556                                      &eb_bh);
 557                if (ret) {
 558                        mlog_errno(ret);
 559                        goto out;
 560                }
 561
 562                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 563                el = &eb->h_list;
 564
 565                if (el->l_tree_depth) {
 566                        ocfs2_error(inode->i_sb,
 567                                    "Inode %lu has non zero tree depth in "
 568                                    "xattr leaf block %llu\n", inode->i_ino,
 569                                    (unsigned long long)eb_bh->b_blocknr);
 570                        ret = -EROFS;
 571                        goto out;
 572                }
 573        }
 574
 575        i = ocfs2_search_extent_list(el, v_cluster);
 576        if (i == -1) {
 577                ret = -EROFS;
 578                mlog_errno(ret);
 579                goto out;
 580        } else {
 581                rec = &el->l_recs[i];
 582                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 583
 584                if (!rec->e_blkno) {
 585                        ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
 586                                    "record (%u, %u, 0) in xattr", inode->i_ino,
 587                                    le32_to_cpu(rec->e_cpos),
 588                                    ocfs2_rec_clusters(el, rec));
 589                        ret = -EROFS;
 590                        goto out;
 591                }
 592                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 593                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 594                                                    le64_to_cpu(rec->e_blkno));
 595                *p_cluster = *p_cluster + coff;
 596                if (num_clusters)
 597                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 598
 599                if (extent_flags)
 600                        *extent_flags = rec->e_flags;
 601        }
 602out:
 603        if (eb_bh)
 604                brelse(eb_bh);
 605        return ret;
 606}
 607
 608int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 609                       u32 *p_cluster, u32 *num_clusters,
 610                       unsigned int *extent_flags)
 611{
 612        int ret;
 613        unsigned int uninitialized_var(hole_len), flags = 0;
 614        struct buffer_head *di_bh = NULL;
 615        struct ocfs2_extent_rec rec;
 616
 617        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 618                ret = -ERANGE;
 619                mlog_errno(ret);
 620                goto out;
 621        }
 622
 623        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 624                                      num_clusters, extent_flags);
 625        if (ret == 0)
 626                goto out;
 627
 628        ret = ocfs2_read_inode_block(inode, &di_bh);
 629        if (ret) {
 630                mlog_errno(ret);
 631                goto out;
 632        }
 633
 634        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 635                                         &rec, NULL);
 636        if (ret) {
 637                mlog_errno(ret);
 638                goto out;
 639        }
 640
 641        if (rec.e_blkno == 0ULL) {
 642                /*
 643                 * A hole was found. Return some canned values that
 644                 * callers can key on. If asked for, num_clusters will
 645                 * be populated with the size of the hole.
 646                 */
 647                *p_cluster = 0;
 648                if (num_clusters) {
 649                        *num_clusters = hole_len;
 650                }
 651        } else {
 652                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 653                                              p_cluster, num_clusters);
 654                flags = rec.e_flags;
 655
 656                ocfs2_extent_map_insert_rec(inode, &rec);
 657        }
 658
 659        if (extent_flags)
 660                *extent_flags = flags;
 661
 662out:
 663        brelse(di_bh);
 664        return ret;
 665}
 666
 667/*
 668 * This expects alloc_sem to be held. The allocation cannot change at
 669 * all while the map is in the process of being updated.
 670 */
 671int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 672                                u64 *ret_count, unsigned int *extent_flags)
 673{
 674        int ret;
 675        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 676        u32 cpos, num_clusters, p_cluster;
 677        u64 boff = 0;
 678
 679        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 680
 681        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 682                                 extent_flags);
 683        if (ret) {
 684                mlog_errno(ret);
 685                goto out;
 686        }
 687
 688        /*
 689         * p_cluster == 0 indicates a hole.
 690         */
 691        if (p_cluster) {
 692                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 693                boff += (v_blkno & (u64)(bpc - 1));
 694        }
 695
 696        *p_blkno = boff;
 697
 698        if (ret_count) {
 699                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 700                *ret_count -= v_blkno & (u64)(bpc - 1);
 701        }
 702
 703out:
 704        return ret;
 705}
 706
 707/*
 708 * The ocfs2_fiemap_inline() may be a little bit misleading, since
 709 * it not only handles the fiemap for inlined files, but also deals
 710 * with the fast symlink, cause they have no difference for extent
 711 * mapping per se.
 712 */
 713static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 714                               struct fiemap_extent_info *fieinfo,
 715                               u64 map_start)
 716{
 717        int ret;
 718        unsigned int id_count;
 719        struct ocfs2_dinode *di;
 720        u64 phys;
 721        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 722        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 723
 724        di = (struct ocfs2_dinode *)di_bh->b_data;
 725        if (ocfs2_inode_is_fast_symlink(inode))
 726                id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 727        else
 728                id_count = le16_to_cpu(di->id2.i_data.id_count);
 729
 730        if (map_start < id_count) {
 731                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 732                if (ocfs2_inode_is_fast_symlink(inode))
 733                        phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 734                else
 735                        phys += offsetof(struct ocfs2_dinode,
 736                                         id2.i_data.id_data);
 737
 738                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 739                                              flags);
 740                if (ret < 0)
 741                        return ret;
 742        }
 743
 744        return 0;
 745}
 746
 747#define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
 748
 749int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 750                 u64 map_start, u64 map_len)
 751{
 752        int ret, is_last;
 753        u32 mapping_end, cpos;
 754        unsigned int hole_size;
 755        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 756        u64 len_bytes, phys_bytes, virt_bytes;
 757        struct buffer_head *di_bh = NULL;
 758        struct ocfs2_extent_rec rec;
 759
 760        ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
 761        if (ret)
 762                return ret;
 763
 764        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 765        if (ret) {
 766                mlog_errno(ret);
 767                goto out;
 768        }
 769
 770        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 771
 772        /*
 773         * Handle inline-data and fast symlink separately.
 774         */
 775        if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 776            ocfs2_inode_is_fast_symlink(inode)) {
 777                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 778                goto out_unlock;
 779        }
 780
 781        cpos = map_start >> osb->s_clustersize_bits;
 782        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 783                                               map_start + map_len);
 784        is_last = 0;
 785        while (cpos < mapping_end && !is_last) {
 786                u32 fe_flags;
 787
 788                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 789                                                 &hole_size, &rec, &is_last);
 790                if (ret) {
 791                        mlog_errno(ret);
 792                        goto out_unlock;
 793                }
 794
 795                if (rec.e_blkno == 0ULL) {
 796                        cpos += hole_size;
 797                        continue;
 798                }
 799
 800                fe_flags = 0;
 801                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 802                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 803                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 804                        fe_flags |= FIEMAP_EXTENT_SHARED;
 805                if (is_last)
 806                        fe_flags |= FIEMAP_EXTENT_LAST;
 807                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 808                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 809                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 810
 811                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 812                                              len_bytes, fe_flags);
 813                if (ret)
 814                        break;
 815
 816                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 817        }
 818
 819        if (ret > 0)
 820                ret = 0;
 821
 822out_unlock:
 823        brelse(di_bh);
 824
 825        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 826
 827        ocfs2_inode_unlock(inode, 0);
 828out:
 829
 830        return ret;
 831}
 832
 833int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 834{
 835        struct inode *inode = file->f_mapping->host;
 836        int ret;
 837        unsigned int is_last = 0, is_data = 0;
 838        u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 839        u32 cpos, cend, clen, hole_size;
 840        u64 extoff, extlen;
 841        struct buffer_head *di_bh = NULL;
 842        struct ocfs2_extent_rec rec;
 843
 844        BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 845
 846        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 847        if (ret) {
 848                mlog_errno(ret);
 849                goto out;
 850        }
 851
 852        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 853
 854        if (*offset >= i_size_read(inode)) {
 855                ret = -ENXIO;
 856                goto out_unlock;
 857        }
 858
 859        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 860                if (whence == SEEK_HOLE)
 861                        *offset = i_size_read(inode);
 862                goto out_unlock;
 863        }
 864
 865        clen = 0;
 866        cpos = *offset >> cs_bits;
 867        cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 868
 869        while (cpos < cend && !is_last) {
 870                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 871                                                 &rec, &is_last);
 872                if (ret) {
 873                        mlog_errno(ret);
 874                        goto out_unlock;
 875                }
 876
 877                extoff = cpos;
 878                extoff <<= cs_bits;
 879
 880                if (rec.e_blkno == 0ULL) {
 881                        clen = hole_size;
 882                        is_data = 0;
 883                } else {
 884                        clen = le16_to_cpu(rec.e_leaf_clusters) -
 885                                (cpos - le32_to_cpu(rec.e_cpos));
 886                        is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 887                }
 888
 889                if ((!is_data && whence == SEEK_HOLE) ||
 890                    (is_data && whence == SEEK_DATA)) {
 891                        if (extoff > *offset)
 892                                *offset = extoff;
 893                        goto out_unlock;
 894                }
 895
 896                if (!is_last)
 897                        cpos += clen;
 898        }
 899
 900        if (whence == SEEK_HOLE) {
 901                extoff = cpos;
 902                extoff <<= cs_bits;
 903                extlen = clen;
 904                extlen <<=  cs_bits;
 905
 906                if ((extoff + extlen) > i_size_read(inode))
 907                        extlen = i_size_read(inode) - extoff;
 908                extoff += extlen;
 909                if (extoff > *offset)
 910                        *offset = extoff;
 911                goto out_unlock;
 912        }
 913
 914        ret = -ENXIO;
 915
 916out_unlock:
 917
 918        brelse(di_bh);
 919
 920        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 921
 922        ocfs2_inode_unlock(inode, 0);
 923out:
 924        return ret;
 925}
 926
 927int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 928                           struct buffer_head *bhs[], int flags,
 929                           int (*validate)(struct super_block *sb,
 930                                           struct buffer_head *bh))
 931{
 932        int rc = 0;
 933        u64 p_block, p_count;
 934        int i, count, done = 0;
 935
 936        trace_ocfs2_read_virt_blocks(
 937             inode, (unsigned long long)v_block, nr, bhs, flags,
 938             validate);
 939
 940        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 941            i_size_read(inode)) {
 942                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 943                goto out;
 944        }
 945
 946        while (done < nr) {
 947                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 948                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 949                                                 &p_block, &p_count, NULL);
 950                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 951                if (rc) {
 952                        mlog_errno(rc);
 953                        break;
 954                }
 955
 956                if (!p_block) {
 957                        rc = -EIO;
 958                        mlog(ML_ERROR,
 959                             "Inode #%llu contains a hole at offset %llu\n",
 960                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
 961                             (unsigned long long)(v_block + done) <<
 962                             inode->i_sb->s_blocksize_bits);
 963                        break;
 964                }
 965
 966                count = nr - done;
 967                if (p_count < count)
 968                        count = p_count;
 969
 970                /*
 971                 * If the caller passed us bhs, they should have come
 972                 * from a previous readahead call to this function.  Thus,
 973                 * they should have the right b_blocknr.
 974                 */
 975                for (i = 0; i < count; i++) {
 976                        if (!bhs[done + i])
 977                                continue;
 978                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
 979                }
 980
 981                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
 982                                       bhs + done, flags, validate);
 983                if (rc) {
 984                        mlog_errno(rc);
 985                        break;
 986                }
 987                done += count;
 988        }
 989
 990out:
 991        return rc;
 992}
 993
 994
 995