linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * extent_map.c
   5 *
   6 * Block/Cluster mapping functions
   7 *
   8 * Copyright (C) 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License, version 2,  as published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public
  20 * License along with this program; if not, write to the
  21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  22 * Boston, MA 021110-1307, USA.
  23 */
  24
  25#include <linux/fs.h>
  26#include <linux/init.h>
  27#include <linux/slab.h>
  28#include <linux/types.h>
  29#include <linux/fiemap.h>
  30
  31#include <cluster/masklog.h>
  32
  33#include "ocfs2.h"
  34
  35#include "alloc.h"
  36#include "dlmglue.h"
  37#include "extent_map.h"
  38#include "inode.h"
  39#include "super.h"
  40#include "symlink.h"
  41#include "aops.h"
  42#include "ocfs2_trace.h"
  43
  44#include "buffer_head_io.h"
  45
  46/*
  47 * The extent caching implementation is intentionally trivial.
  48 *
  49 * We only cache a small number of extents stored directly on the
  50 * inode, so linear order operations are acceptable. If we ever want
  51 * to increase the size of the extent map, then these algorithms must
  52 * get smarter.
  53 */
  54
  55void ocfs2_extent_map_init(struct inode *inode)
  56{
  57        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  58
  59        oi->ip_extent_map.em_num_items = 0;
  60        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  61}
  62
  63static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  64                                      unsigned int cpos,
  65                                      struct ocfs2_extent_map_item **ret_emi)
  66{
  67        unsigned int range;
  68        struct ocfs2_extent_map_item *emi;
  69
  70        *ret_emi = NULL;
  71
  72        list_for_each_entry(emi, &em->em_list, ei_list) {
  73                range = emi->ei_cpos + emi->ei_clusters;
  74
  75                if (cpos >= emi->ei_cpos && cpos < range) {
  76                        list_move(&emi->ei_list, &em->em_list);
  77
  78                        *ret_emi = emi;
  79                        break;
  80                }
  81        }
  82}
  83
  84static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  85                                   unsigned int *phys, unsigned int *len,
  86                                   unsigned int *flags)
  87{
  88        unsigned int coff;
  89        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  90        struct ocfs2_extent_map_item *emi;
  91
  92        spin_lock(&oi->ip_lock);
  93
  94        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  95        if (emi) {
  96                coff = cpos - emi->ei_cpos;
  97                *phys = emi->ei_phys + coff;
  98                if (len)
  99                        *len = emi->ei_clusters - coff;
 100                if (flags)
 101                        *flags = emi->ei_flags;
 102        }
 103
 104        spin_unlock(&oi->ip_lock);
 105
 106        if (emi == NULL)
 107                return -ENOENT;
 108
 109        return 0;
 110}
 111
 112/*
 113 * Forget about all clusters equal to or greater than cpos.
 114 */
 115void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 116{
 117        struct ocfs2_extent_map_item *emi, *n;
 118        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 119        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 120        LIST_HEAD(tmp_list);
 121        unsigned int range;
 122
 123        spin_lock(&oi->ip_lock);
 124        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 125                if (emi->ei_cpos >= cpos) {
 126                        /* Full truncate of this record. */
 127                        list_move(&emi->ei_list, &tmp_list);
 128                        BUG_ON(em->em_num_items == 0);
 129                        em->em_num_items--;
 130                        continue;
 131                }
 132
 133                range = emi->ei_cpos + emi->ei_clusters;
 134                if (range > cpos) {
 135                        /* Partial truncate */
 136                        emi->ei_clusters = cpos - emi->ei_cpos;
 137                }
 138        }
 139        spin_unlock(&oi->ip_lock);
 140
 141        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 142                list_del(&emi->ei_list);
 143                kfree(emi);
 144        }
 145}
 146
 147/*
 148 * Is any part of emi2 contained within emi1
 149 */
 150static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 151                                 struct ocfs2_extent_map_item *emi2)
 152{
 153        unsigned int range1, range2;
 154
 155        /*
 156         * Check if logical start of emi2 is inside emi1
 157         */
 158        range1 = emi1->ei_cpos + emi1->ei_clusters;
 159        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 160                return 1;
 161
 162        /*
 163         * Check if logical end of emi2 is inside emi1
 164         */
 165        range2 = emi2->ei_cpos + emi2->ei_clusters;
 166        if (range2 > emi1->ei_cpos && range2 <= range1)
 167                return 1;
 168
 169        return 0;
 170}
 171
 172static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 173                                  struct ocfs2_extent_map_item *src)
 174{
 175        dest->ei_cpos = src->ei_cpos;
 176        dest->ei_phys = src->ei_phys;
 177        dest->ei_clusters = src->ei_clusters;
 178        dest->ei_flags = src->ei_flags;
 179}
 180
 181/*
 182 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 183 * otherwise.
 184 */
 185static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 186                                         struct ocfs2_extent_map_item *ins)
 187{
 188        /*
 189         * Handle contiguousness
 190         */
 191        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 192            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 193            ins->ei_flags == emi->ei_flags) {
 194                emi->ei_clusters += ins->ei_clusters;
 195                return 1;
 196        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 197                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 198                   ins->ei_flags == emi->ei_flags) {
 199                emi->ei_phys = ins->ei_phys;
 200                emi->ei_cpos = ins->ei_cpos;
 201                emi->ei_clusters += ins->ei_clusters;
 202                return 1;
 203        }
 204
 205        /*
 206         * Overlapping extents - this shouldn't happen unless we've
 207         * split an extent to change it's flags. That is exceedingly
 208         * rare, so there's no sense in trying to optimize it yet.
 209         */
 210        if (ocfs2_ei_is_contained(emi, ins) ||
 211            ocfs2_ei_is_contained(ins, emi)) {
 212                ocfs2_copy_emi_fields(emi, ins);
 213                return 1;
 214        }
 215
 216        /* No merge was possible. */
 217        return 0;
 218}
 219
 220/*
 221 * In order to reduce complexity on the caller, this insert function
 222 * is intentionally liberal in what it will accept.
 223 *
 224 * The only rule is that the truncate call *must* be used whenever
 225 * records have been deleted. This avoids inserting overlapping
 226 * records with different physical mappings.
 227 */
 228void ocfs2_extent_map_insert_rec(struct inode *inode,
 229                                 struct ocfs2_extent_rec *rec)
 230{
 231        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 232        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 233        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 234        struct ocfs2_extent_map_item ins;
 235
 236        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 237        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 238                                               le64_to_cpu(rec->e_blkno));
 239        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 240        ins.ei_flags = rec->e_flags;
 241
 242search:
 243        spin_lock(&oi->ip_lock);
 244
 245        list_for_each_entry(emi, &em->em_list, ei_list) {
 246                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 247                        list_move(&emi->ei_list, &em->em_list);
 248                        spin_unlock(&oi->ip_lock);
 249                        goto out;
 250                }
 251        }
 252
 253        /*
 254         * No item could be merged.
 255         *
 256         * Either allocate and add a new item, or overwrite the last recently
 257         * inserted.
 258         */
 259
 260        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 261                if (new_emi == NULL) {
 262                        spin_unlock(&oi->ip_lock);
 263
 264                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 265                        if (new_emi == NULL)
 266                                goto out;
 267
 268                        goto search;
 269                }
 270
 271                ocfs2_copy_emi_fields(new_emi, &ins);
 272                list_add(&new_emi->ei_list, &em->em_list);
 273                em->em_num_items++;
 274                new_emi = NULL;
 275        } else {
 276                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 277                emi = list_entry(em->em_list.prev,
 278                                 struct ocfs2_extent_map_item, ei_list);
 279                list_move(&emi->ei_list, &em->em_list);
 280                ocfs2_copy_emi_fields(emi, &ins);
 281        }
 282
 283        spin_unlock(&oi->ip_lock);
 284
 285out:
 286        kfree(new_emi);
 287}
 288
 289static int ocfs2_last_eb_is_empty(struct inode *inode,
 290                                  struct ocfs2_dinode *di)
 291{
 292        int ret, next_free;
 293        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 294        struct buffer_head *eb_bh = NULL;
 295        struct ocfs2_extent_block *eb;
 296        struct ocfs2_extent_list *el;
 297
 298        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 299        if (ret) {
 300                mlog_errno(ret);
 301                goto out;
 302        }
 303
 304        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 305        el = &eb->h_list;
 306
 307        if (el->l_tree_depth) {
 308                ocfs2_error(inode->i_sb,
 309                            "Inode %lu has non zero tree depth in leaf block %llu\n",
 310                            inode->i_ino,
 311                            (unsigned long long)eb_bh->b_blocknr);
 312                ret = -EROFS;
 313                goto out;
 314        }
 315
 316        next_free = le16_to_cpu(el->l_next_free_rec);
 317
 318        if (next_free == 0 ||
 319            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 320                ret = 1;
 321
 322out:
 323        brelse(eb_bh);
 324        return ret;
 325}
 326
 327/*
 328 * Return the 1st index within el which contains an extent start
 329 * larger than v_cluster.
 330 */
 331static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 332                                       u32 v_cluster)
 333{
 334        int i;
 335        struct ocfs2_extent_rec *rec;
 336
 337        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 338                rec = &el->l_recs[i];
 339
 340                if (v_cluster < le32_to_cpu(rec->e_cpos))
 341                        break;
 342        }
 343
 344        return i;
 345}
 346
 347/*
 348 * Figure out the size of a hole which starts at v_cluster within the given
 349 * extent list.
 350 *
 351 * If there is no more allocation past v_cluster, we return the maximum
 352 * cluster size minus v_cluster.
 353 *
 354 * If we have in-inode extents, then el points to the dinode list and
 355 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 356 * containing el.
 357 */
 358int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 359                               struct ocfs2_extent_list *el,
 360                               struct buffer_head *eb_bh,
 361                               u32 v_cluster,
 362                               u32 *num_clusters)
 363{
 364        int ret, i;
 365        struct buffer_head *next_eb_bh = NULL;
 366        struct ocfs2_extent_block *eb, *next_eb;
 367
 368        i = ocfs2_search_for_hole_index(el, v_cluster);
 369
 370        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 371                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 372
 373                /*
 374                 * Check the next leaf for any extents.
 375                 */
 376
 377                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 378                        goto no_more_extents;
 379
 380                ret = ocfs2_read_extent_block(ci,
 381                                              le64_to_cpu(eb->h_next_leaf_blk),
 382                                              &next_eb_bh);
 383                if (ret) {
 384                        mlog_errno(ret);
 385                        goto out;
 386                }
 387
 388                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 389                el = &next_eb->h_list;
 390                i = ocfs2_search_for_hole_index(el, v_cluster);
 391        }
 392
 393no_more_extents:
 394        if (i == le16_to_cpu(el->l_next_free_rec)) {
 395                /*
 396                 * We're at the end of our existing allocation. Just
 397                 * return the maximum number of clusters we could
 398                 * possibly allocate.
 399                 */
 400                *num_clusters = UINT_MAX - v_cluster;
 401        } else {
 402                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 403        }
 404
 405        ret = 0;
 406out:
 407        brelse(next_eb_bh);
 408        return ret;
 409}
 410
 411static int ocfs2_get_clusters_nocache(struct inode *inode,
 412                                      struct buffer_head *di_bh,
 413                                      u32 v_cluster, unsigned int *hole_len,
 414                                      struct ocfs2_extent_rec *ret_rec,
 415                                      unsigned int *is_last)
 416{
 417        int i, ret, tree_height, len;
 418        struct ocfs2_dinode *di;
 419        struct ocfs2_extent_block *uninitialized_var(eb);
 420        struct ocfs2_extent_list *el;
 421        struct ocfs2_extent_rec *rec;
 422        struct buffer_head *eb_bh = NULL;
 423
 424        memset(ret_rec, 0, sizeof(*ret_rec));
 425        if (is_last)
 426                *is_last = 0;
 427
 428        di = (struct ocfs2_dinode *) di_bh->b_data;
 429        el = &di->id2.i_list;
 430        tree_height = le16_to_cpu(el->l_tree_depth);
 431
 432        if (tree_height > 0) {
 433                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 434                                      &eb_bh);
 435                if (ret) {
 436                        mlog_errno(ret);
 437                        goto out;
 438                }
 439
 440                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 441                el = &eb->h_list;
 442
 443                if (el->l_tree_depth) {
 444                        ocfs2_error(inode->i_sb,
 445                                    "Inode %lu has non zero tree depth in leaf block %llu\n",
 446                                    inode->i_ino,
 447                                    (unsigned long long)eb_bh->b_blocknr);
 448                        ret = -EROFS;
 449                        goto out;
 450                }
 451        }
 452
 453        i = ocfs2_search_extent_list(el, v_cluster);
 454        if (i == -1) {
 455                /*
 456                 * Holes can be larger than the maximum size of an
 457                 * extent, so we return their lengths in a separate
 458                 * field.
 459                 */
 460                if (hole_len) {
 461                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 462                                                         el, eb_bh,
 463                                                         v_cluster, &len);
 464                        if (ret) {
 465                                mlog_errno(ret);
 466                                goto out;
 467                        }
 468
 469                        *hole_len = len;
 470                }
 471                goto out_hole;
 472        }
 473
 474        rec = &el->l_recs[i];
 475
 476        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 477
 478        if (!rec->e_blkno) {
 479                ocfs2_error(inode->i_sb,
 480                            "Inode %lu has bad extent record (%u, %u, 0)\n",
 481                            inode->i_ino,
 482                            le32_to_cpu(rec->e_cpos),
 483                            ocfs2_rec_clusters(el, rec));
 484                ret = -EROFS;
 485                goto out;
 486        }
 487
 488        *ret_rec = *rec;
 489
 490        /*
 491         * Checking for last extent is potentially expensive - we
 492         * might have to look at the next leaf over to see if it's
 493         * empty.
 494         *
 495         * The first two checks are to see whether the caller even
 496         * cares for this information, and if the extent is at least
 497         * the last in it's list.
 498         *
 499         * If those hold true, then the extent is last if any of the
 500         * additional conditions hold true:
 501         *  - Extent list is in-inode
 502         *  - Extent list is right-most
 503         *  - Extent list is 2nd to rightmost, with empty right-most
 504         */
 505        if (is_last) {
 506                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 507                        if (tree_height == 0)
 508                                *is_last = 1;
 509                        else if (eb->h_blkno == di->i_last_eb_blk)
 510                                *is_last = 1;
 511                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 512                                ret = ocfs2_last_eb_is_empty(inode, di);
 513                                if (ret < 0) {
 514                                        mlog_errno(ret);
 515                                        goto out;
 516                                }
 517                                if (ret == 1)
 518                                        *is_last = 1;
 519                        }
 520                }
 521        }
 522
 523out_hole:
 524        ret = 0;
 525out:
 526        brelse(eb_bh);
 527        return ret;
 528}
 529
 530static void ocfs2_relative_extent_offsets(struct super_block *sb,
 531                                          u32 v_cluster,
 532                                          struct ocfs2_extent_rec *rec,
 533                                          u32 *p_cluster, u32 *num_clusters)
 534
 535{
 536        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 537
 538        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 539        *p_cluster = *p_cluster + coff;
 540
 541        if (num_clusters)
 542                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 543}
 544
 545int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 546                             u32 *p_cluster, u32 *num_clusters,
 547                             struct ocfs2_extent_list *el,
 548                             unsigned int *extent_flags)
 549{
 550        int ret = 0, i;
 551        struct buffer_head *eb_bh = NULL;
 552        struct ocfs2_extent_block *eb;
 553        struct ocfs2_extent_rec *rec;
 554        u32 coff;
 555
 556        if (el->l_tree_depth) {
 557                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 558                                      &eb_bh);
 559                if (ret) {
 560                        mlog_errno(ret);
 561                        goto out;
 562                }
 563
 564                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 565                el = &eb->h_list;
 566
 567                if (el->l_tree_depth) {
 568                        ocfs2_error(inode->i_sb,
 569                                    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
 570                                    inode->i_ino,
 571                                    (unsigned long long)eb_bh->b_blocknr);
 572                        ret = -EROFS;
 573                        goto out;
 574                }
 575        }
 576
 577        i = ocfs2_search_extent_list(el, v_cluster);
 578        if (i == -1) {
 579                ret = -EROFS;
 580                mlog_errno(ret);
 581                goto out;
 582        } else {
 583                rec = &el->l_recs[i];
 584                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 585
 586                if (!rec->e_blkno) {
 587                        ocfs2_error(inode->i_sb,
 588                                    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
 589                                    inode->i_ino,
 590                                    le32_to_cpu(rec->e_cpos),
 591                                    ocfs2_rec_clusters(el, rec));
 592                        ret = -EROFS;
 593                        goto out;
 594                }
 595                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 596                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 597                                                    le64_to_cpu(rec->e_blkno));
 598                *p_cluster = *p_cluster + coff;
 599                if (num_clusters)
 600                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 601
 602                if (extent_flags)
 603                        *extent_flags = rec->e_flags;
 604        }
 605out:
 606        if (eb_bh)
 607                brelse(eb_bh);
 608        return ret;
 609}
 610
 611int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 612                       u32 *p_cluster, u32 *num_clusters,
 613                       unsigned int *extent_flags)
 614{
 615        int ret;
 616        unsigned int uninitialized_var(hole_len), flags = 0;
 617        struct buffer_head *di_bh = NULL;
 618        struct ocfs2_extent_rec rec;
 619
 620        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 621                ret = -ERANGE;
 622                mlog_errno(ret);
 623                goto out;
 624        }
 625
 626        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 627                                      num_clusters, extent_flags);
 628        if (ret == 0)
 629                goto out;
 630
 631        ret = ocfs2_read_inode_block(inode, &di_bh);
 632        if (ret) {
 633                mlog_errno(ret);
 634                goto out;
 635        }
 636
 637        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 638                                         &rec, NULL);
 639        if (ret) {
 640                mlog_errno(ret);
 641                goto out;
 642        }
 643
 644        if (rec.e_blkno == 0ULL) {
 645                /*
 646                 * A hole was found. Return some canned values that
 647                 * callers can key on. If asked for, num_clusters will
 648                 * be populated with the size of the hole.
 649                 */
 650                *p_cluster = 0;
 651                if (num_clusters) {
 652                        *num_clusters = hole_len;
 653                }
 654        } else {
 655                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 656                                              p_cluster, num_clusters);
 657                flags = rec.e_flags;
 658
 659                ocfs2_extent_map_insert_rec(inode, &rec);
 660        }
 661
 662        if (extent_flags)
 663                *extent_flags = flags;
 664
 665out:
 666        brelse(di_bh);
 667        return ret;
 668}
 669
 670/*
 671 * This expects alloc_sem to be held. The allocation cannot change at
 672 * all while the map is in the process of being updated.
 673 */
 674int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 675                                u64 *ret_count, unsigned int *extent_flags)
 676{
 677        int ret;
 678        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 679        u32 cpos, num_clusters, p_cluster;
 680        u64 boff = 0;
 681
 682        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 683
 684        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 685                                 extent_flags);
 686        if (ret) {
 687                mlog_errno(ret);
 688                goto out;
 689        }
 690
 691        /*
 692         * p_cluster == 0 indicates a hole.
 693         */
 694        if (p_cluster) {
 695                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 696                boff += (v_blkno & (u64)(bpc - 1));
 697        }
 698
 699        *p_blkno = boff;
 700
 701        if (ret_count) {
 702                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 703                *ret_count -= v_blkno & (u64)(bpc - 1);
 704        }
 705
 706out:
 707        return ret;
 708}
 709
 710/*
 711 * The ocfs2_fiemap_inline() may be a little bit misleading, since
 712 * it not only handles the fiemap for inlined files, but also deals
 713 * with the fast symlink, cause they have no difference for extent
 714 * mapping per se.
 715 */
 716static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 717                               struct fiemap_extent_info *fieinfo,
 718                               u64 map_start)
 719{
 720        int ret;
 721        unsigned int id_count;
 722        struct ocfs2_dinode *di;
 723        u64 phys;
 724        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 725        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 726
 727        di = (struct ocfs2_dinode *)di_bh->b_data;
 728        if (ocfs2_inode_is_fast_symlink(inode))
 729                id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 730        else
 731                id_count = le16_to_cpu(di->id2.i_data.id_count);
 732
 733        if (map_start < id_count) {
 734                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 735                if (ocfs2_inode_is_fast_symlink(inode))
 736                        phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 737                else
 738                        phys += offsetof(struct ocfs2_dinode,
 739                                         id2.i_data.id_data);
 740
 741                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 742                                              flags);
 743                if (ret < 0)
 744                        return ret;
 745        }
 746
 747        return 0;
 748}
 749
 750#define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
 751
 752int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 753                 u64 map_start, u64 map_len)
 754{
 755        int ret, is_last;
 756        u32 mapping_end, cpos;
 757        unsigned int hole_size;
 758        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 759        u64 len_bytes, phys_bytes, virt_bytes;
 760        struct buffer_head *di_bh = NULL;
 761        struct ocfs2_extent_rec rec;
 762
 763        ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
 764        if (ret)
 765                return ret;
 766
 767        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 768        if (ret) {
 769                mlog_errno(ret);
 770                goto out;
 771        }
 772
 773        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 774
 775        /*
 776         * Handle inline-data and fast symlink separately.
 777         */
 778        if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 779            ocfs2_inode_is_fast_symlink(inode)) {
 780                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 781                goto out_unlock;
 782        }
 783
 784        cpos = map_start >> osb->s_clustersize_bits;
 785        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 786                                               map_start + map_len);
 787        is_last = 0;
 788        while (cpos < mapping_end && !is_last) {
 789                u32 fe_flags;
 790
 791                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 792                                                 &hole_size, &rec, &is_last);
 793                if (ret) {
 794                        mlog_errno(ret);
 795                        goto out_unlock;
 796                }
 797
 798                if (rec.e_blkno == 0ULL) {
 799                        cpos += hole_size;
 800                        continue;
 801                }
 802
 803                fe_flags = 0;
 804                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 805                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 806                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 807                        fe_flags |= FIEMAP_EXTENT_SHARED;
 808                if (is_last)
 809                        fe_flags |= FIEMAP_EXTENT_LAST;
 810                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 811                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 812                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 813
 814                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 815                                              len_bytes, fe_flags);
 816                if (ret)
 817                        break;
 818
 819                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 820        }
 821
 822        if (ret > 0)
 823                ret = 0;
 824
 825out_unlock:
 826        brelse(di_bh);
 827
 828        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 829
 830        ocfs2_inode_unlock(inode, 0);
 831out:
 832
 833        return ret;
 834}
 835
 836/* Is IO overwriting allocated blocks? */
 837int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
 838                       u64 map_start, u64 map_len)
 839{
 840        int ret = 0, is_last;
 841        u32 mapping_end, cpos;
 842        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 843        struct ocfs2_extent_rec rec;
 844
 845        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 846                if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
 847                        return ret;
 848                else
 849                        return -EAGAIN;
 850        }
 851
 852        cpos = map_start >> osb->s_clustersize_bits;
 853        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 854                                               map_start + map_len);
 855        is_last = 0;
 856        while (cpos < mapping_end && !is_last) {
 857                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 858                                                 NULL, &rec, &is_last);
 859                if (ret) {
 860                        mlog_errno(ret);
 861                        goto out;
 862                }
 863
 864                if (rec.e_blkno == 0ULL)
 865                        break;
 866
 867                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 868                        break;
 869
 870                cpos = le32_to_cpu(rec.e_cpos) +
 871                        le16_to_cpu(rec.e_leaf_clusters);
 872        }
 873
 874        if (cpos < mapping_end)
 875                ret = -EAGAIN;
 876out:
 877        return ret;
 878}
 879
 880int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 881{
 882        struct inode *inode = file->f_mapping->host;
 883        int ret;
 884        unsigned int is_last = 0, is_data = 0;
 885        u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 886        u32 cpos, cend, clen, hole_size;
 887        u64 extoff, extlen;
 888        struct buffer_head *di_bh = NULL;
 889        struct ocfs2_extent_rec rec;
 890
 891        BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 892
 893        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 894        if (ret) {
 895                mlog_errno(ret);
 896                goto out;
 897        }
 898
 899        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 900
 901        if (*offset >= i_size_read(inode)) {
 902                ret = -ENXIO;
 903                goto out_unlock;
 904        }
 905
 906        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 907                if (whence == SEEK_HOLE)
 908                        *offset = i_size_read(inode);
 909                goto out_unlock;
 910        }
 911
 912        clen = 0;
 913        cpos = *offset >> cs_bits;
 914        cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 915
 916        while (cpos < cend && !is_last) {
 917                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 918                                                 &rec, &is_last);
 919                if (ret) {
 920                        mlog_errno(ret);
 921                        goto out_unlock;
 922                }
 923
 924                extoff = cpos;
 925                extoff <<= cs_bits;
 926
 927                if (rec.e_blkno == 0ULL) {
 928                        clen = hole_size;
 929                        is_data = 0;
 930                } else {
 931                        clen = le16_to_cpu(rec.e_leaf_clusters) -
 932                                (cpos - le32_to_cpu(rec.e_cpos));
 933                        is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 934                }
 935
 936                if ((!is_data && whence == SEEK_HOLE) ||
 937                    (is_data && whence == SEEK_DATA)) {
 938                        if (extoff > *offset)
 939                                *offset = extoff;
 940                        goto out_unlock;
 941                }
 942
 943                if (!is_last)
 944                        cpos += clen;
 945        }
 946
 947        if (whence == SEEK_HOLE) {
 948                extoff = cpos;
 949                extoff <<= cs_bits;
 950                extlen = clen;
 951                extlen <<=  cs_bits;
 952
 953                if ((extoff + extlen) > i_size_read(inode))
 954                        extlen = i_size_read(inode) - extoff;
 955                extoff += extlen;
 956                if (extoff > *offset)
 957                        *offset = extoff;
 958                goto out_unlock;
 959        }
 960
 961        ret = -ENXIO;
 962
 963out_unlock:
 964
 965        brelse(di_bh);
 966
 967        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 968
 969        ocfs2_inode_unlock(inode, 0);
 970out:
 971        return ret;
 972}
 973
 974int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 975                           struct buffer_head *bhs[], int flags,
 976                           int (*validate)(struct super_block *sb,
 977                                           struct buffer_head *bh))
 978{
 979        int rc = 0;
 980        u64 p_block, p_count;
 981        int i, count, done = 0;
 982
 983        trace_ocfs2_read_virt_blocks(
 984             inode, (unsigned long long)v_block, nr, bhs, flags,
 985             validate);
 986
 987        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 988            i_size_read(inode)) {
 989                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 990                goto out;
 991        }
 992
 993        while (done < nr) {
 994                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 995                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 996                                                 &p_block, &p_count, NULL);
 997                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 998                if (rc) {
 999                        mlog_errno(rc);
1000                        break;
1001                }
1002
1003                if (!p_block) {
1004                        rc = -EIO;
1005                        mlog(ML_ERROR,
1006                             "Inode #%llu contains a hole at offset %llu\n",
1007                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
1008                             (unsigned long long)(v_block + done) <<
1009                             inode->i_sb->s_blocksize_bits);
1010                        break;
1011                }
1012
1013                count = nr - done;
1014                if (p_count < count)
1015                        count = p_count;
1016
1017                /*
1018                 * If the caller passed us bhs, they should have come
1019                 * from a previous readahead call to this function.  Thus,
1020                 * they should have the right b_blocknr.
1021                 */
1022                for (i = 0; i < count; i++) {
1023                        if (!bhs[done + i])
1024                                continue;
1025                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1026                }
1027
1028                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
1029                                       bhs + done, flags, validate);
1030                if (rc) {
1031                        mlog_errno(rc);
1032                        break;
1033                }
1034                done += count;
1035        }
1036
1037out:
1038        return rc;
1039}
1040
1041
1042