linux/net/ceph/striper.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2
   3#include <linux/ceph/ceph_debug.h>
   4
   5#include <linux/math64.h>
   6#include <linux/slab.h>
   7
   8#include <linux/ceph/striper.h>
   9#include <linux/ceph/types.h>
  10
  11/*
  12 * Map a file extent to a stripe unit within an object.
  13 * Fill in objno, offset into object, and object extent length (i.e. the
  14 * number of bytes mapped, less than or equal to @l->stripe_unit).
  15 *
  16 * Example for stripe_count = 3, stripes_per_object = 4:
  17 *
  18 * blockno   |  0  3  6  9 |  1  4  7 10 |  2  5  8 11 | 12 15 18 21 | 13 16 19
  19 * stripeno  |  0  1  2  3 |  0  1  2  3 |  0  1  2  3 |  4  5  6  7 |  4  5  6
  20 * stripepos |      0      |      1      |      2      |      0      |      1
  21 * objno     |      0      |      1      |      2      |      3      |      4
  22 * objsetno  |                    0                    |                    1
  23 */
  24void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
  25                                   u64 off, u64 len,
  26                                   u64 *objno, u64 *objoff, u32 *xlen)
  27{
  28        u32 stripes_per_object = l->object_size / l->stripe_unit;
  29        u64 blockno;    /* which su in the file (i.e. globally) */
  30        u32 blockoff;   /* offset into su */
  31        u64 stripeno;   /* which stripe */
  32        u32 stripepos;  /* which su in the stripe,
  33                           which object in the object set */
  34        u64 objsetno;   /* which object set */
  35        u32 objsetpos;  /* which stripe in the object set */
  36
  37        blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
  38        stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
  39        objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
  40
  41        *objno = objsetno * l->stripe_count + stripepos;
  42        *objoff = objsetpos * l->stripe_unit + blockoff;
  43        *xlen = min_t(u64, len, l->stripe_unit - blockoff);
  44}
  45EXPORT_SYMBOL(ceph_calc_file_object_mapping);
  46
  47/*
  48 * Return the last extent with given objno (@object_extents is sorted
  49 * by objno).  If not found, return NULL and set @add_pos so that the
  50 * new extent can be added with list_add(add_pos, new_ex).
  51 */
  52static struct ceph_object_extent *
  53lookup_last(struct list_head *object_extents, u64 objno,
  54            struct list_head **add_pos)
  55{
  56        struct list_head *pos;
  57
  58        list_for_each_prev(pos, object_extents) {
  59                struct ceph_object_extent *ex =
  60                    list_entry(pos, typeof(*ex), oe_item);
  61
  62                if (ex->oe_objno == objno)
  63                        return ex;
  64
  65                if (ex->oe_objno < objno)
  66                        break;
  67        }
  68
  69        *add_pos = pos;
  70        return NULL;
  71}
  72
  73static struct ceph_object_extent *
  74lookup_containing(struct list_head *object_extents, u64 objno,
  75                  u64 objoff, u32 xlen)
  76{
  77        struct ceph_object_extent *ex;
  78
  79        list_for_each_entry(ex, object_extents, oe_item) {
  80                if (ex->oe_objno == objno &&
  81                    ex->oe_off <= objoff &&
  82                    ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
  83                        return ex;
  84
  85                if (ex->oe_objno > objno)
  86                        break;
  87        }
  88
  89        return NULL;
  90}
  91
  92/*
  93 * Map a file extent to a sorted list of object extents.
  94 *
  95 * We want only one (or as few as possible) object extents per object.
  96 * Adjacent object extents will be merged together, each returned object
  97 * extent may reverse map to multiple different file extents.
  98 *
  99 * Call @alloc_fn for each new object extent and @action_fn for each
 100 * mapped stripe unit, whether it was merged into an already allocated
 101 * object extent or started a new object extent.
 102 *
 103 * Newly allocated object extents are added to @object_extents.
 104 * To keep @object_extents sorted, successive calls to this function
 105 * must map successive file extents (i.e. the list of file extents that
 106 * are mapped using the same @object_extents must be sorted).
 107 *
 108 * The caller is responsible for @object_extents.
 109 */
 110int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
 111                         struct list_head *object_extents,
 112                         struct ceph_object_extent *alloc_fn(void *arg),
 113                         void *alloc_arg,
 114                         ceph_object_extent_fn_t action_fn,
 115                         void *action_arg)
 116{
 117        struct ceph_object_extent *last_ex, *ex;
 118
 119        while (len) {
 120                struct list_head *add_pos = NULL;
 121                u64 objno, objoff;
 122                u32 xlen;
 123
 124                ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
 125                                              &xlen);
 126
 127                last_ex = lookup_last(object_extents, objno, &add_pos);
 128                if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
 129                        ex = alloc_fn(alloc_arg);
 130                        if (!ex)
 131                                return -ENOMEM;
 132
 133                        ex->oe_objno = objno;
 134                        ex->oe_off = objoff;
 135                        ex->oe_len = xlen;
 136                        if (action_fn)
 137                                action_fn(ex, xlen, action_arg);
 138
 139                        if (!last_ex)
 140                                list_add(&ex->oe_item, add_pos);
 141                        else
 142                                list_add(&ex->oe_item, &last_ex->oe_item);
 143                } else {
 144                        last_ex->oe_len += xlen;
 145                        if (action_fn)
 146                                action_fn(last_ex, xlen, action_arg);
 147                }
 148
 149                off += xlen;
 150                len -= xlen;
 151        }
 152
 153        for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
 154             ex = list_next_entry(last_ex, oe_item);
 155             &ex->oe_item != object_extents;
 156             last_ex = ex, ex = list_next_entry(ex, oe_item)) {
 157                if (last_ex->oe_objno > ex->oe_objno ||
 158                    (last_ex->oe_objno == ex->oe_objno &&
 159                     last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
 160                        WARN(1, "%s: object_extents list not sorted!\n",
 161                             __func__);
 162                        return -EINVAL;
 163                }
 164        }
 165
 166        return 0;
 167}
 168EXPORT_SYMBOL(ceph_file_to_extents);
 169
 170/*
 171 * A stripped down, non-allocating version of ceph_file_to_extents(),
 172 * for when @object_extents is already populated.
 173 */
 174int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
 175                         struct list_head *object_extents,
 176                         ceph_object_extent_fn_t action_fn,
 177                         void *action_arg)
 178{
 179        while (len) {
 180                struct ceph_object_extent *ex;
 181                u64 objno, objoff;
 182                u32 xlen;
 183
 184                ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
 185                                              &xlen);
 186
 187                ex = lookup_containing(object_extents, objno, objoff, xlen);
 188                if (!ex) {
 189                        WARN(1, "%s: objno %llu %llu~%u not found!\n",
 190                             __func__, objno, objoff, xlen);
 191                        return -EINVAL;
 192                }
 193
 194                action_fn(ex, xlen, action_arg);
 195
 196                off += xlen;
 197                len -= xlen;
 198        }
 199
 200        return 0;
 201}
 202EXPORT_SYMBOL(ceph_iterate_extents);
 203
 204/*
 205 * Reverse map an object extent to a sorted list of file extents.
 206 *
 207 * On success, the caller is responsible for:
 208 *
 209 *     kfree(file_extents)
 210 */
 211int ceph_extent_to_file(struct ceph_file_layout *l,
 212                        u64 objno, u64 objoff, u64 objlen,
 213                        struct ceph_file_extent **file_extents,
 214                        u32 *num_file_extents)
 215{
 216        u32 stripes_per_object = l->object_size / l->stripe_unit;
 217        u64 blockno;    /* which su */
 218        u32 blockoff;   /* offset into su */
 219        u64 stripeno;   /* which stripe */
 220        u32 stripepos;  /* which su in the stripe,
 221                           which object in the object set */
 222        u64 objsetno;   /* which object set */
 223        u32 i = 0;
 224
 225        if (!objlen) {
 226                *file_extents = NULL;
 227                *num_file_extents = 0;
 228                return 0;
 229        }
 230
 231        *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
 232                                     DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
 233        *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
 234                                      GFP_NOIO);
 235        if (!*file_extents)
 236                return -ENOMEM;
 237
 238        div_u64_rem(objoff, l->stripe_unit, &blockoff);
 239        while (objlen) {
 240                u64 off, len;
 241
 242                objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
 243                stripeno = div_u64(objoff, l->stripe_unit) +
 244                                                objsetno * stripes_per_object;
 245                blockno = stripeno * l->stripe_count + stripepos;
 246                off = blockno * l->stripe_unit + blockoff;
 247                len = min_t(u64, objlen, l->stripe_unit - blockoff);
 248
 249                (*file_extents)[i].fe_off = off;
 250                (*file_extents)[i].fe_len = len;
 251
 252                blockoff = 0;
 253                objoff += len;
 254                objlen -= len;
 255                i++;
 256        }
 257
 258        BUG_ON(i != *num_file_extents);
 259        return 0;
 260}
 261EXPORT_SYMBOL(ceph_extent_to_file);
 262