linux/drivers/staging/lustre/lustre/lov/lov_pack.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2011, 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * lustre/lov/lov_pack.c
  37 *
  38 * (Un)packing of OST/MDS requests
  39 *
  40 * Author: Andreas Dilger <adilger@clusterfs.com>
  41 */
  42
  43#define DEBUG_SUBSYSTEM S_LOV
  44
  45#include "../include/lustre_net.h"
  46#include "../include/obd.h"
  47#include "../include/obd_class.h"
  48#include "../include/obd_support.h"
  49#include "../include/lustre/lustre_user.h"
  50
  51#include "lov_internal.h"
  52
  53void lov_dump_lmm_common(int level, void *lmmp)
  54{
  55        struct lov_mds_md *lmm = lmmp;
  56        struct ost_id   oi;
  57
  58        lmm_oi_le_to_cpu(&oi, &lmm->lmm_oi);
  59        CDEBUG(level, "objid "DOSTID", magic 0x%08x, pattern %#x\n",
  60               POSTID(&oi), le32_to_cpu(lmm->lmm_magic),
  61               le32_to_cpu(lmm->lmm_pattern));
  62        CDEBUG(level, "stripe_size %u, stripe_count %u, layout_gen %u\n",
  63               le32_to_cpu(lmm->lmm_stripe_size),
  64               le16_to_cpu(lmm->lmm_stripe_count),
  65               le16_to_cpu(lmm->lmm_layout_gen));
  66}
  67
  68static void lov_dump_lmm_objects(int level, struct lov_ost_data *lod,
  69                                 int stripe_count)
  70{
  71        int i;
  72
  73        if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
  74                CDEBUG(level, "bad stripe_count %u > max_stripe_count %u\n",
  75                       stripe_count, LOV_V1_INSANE_STRIPE_COUNT);
  76                return;
  77        }
  78
  79        for (i = 0; i < stripe_count; ++i, ++lod) {
  80                struct ost_id   oi;
  81
  82                ostid_le_to_cpu(&lod->l_ost_oi, &oi);
  83                CDEBUG(level, "stripe %u idx %u subobj "DOSTID"\n", i,
  84                       le32_to_cpu(lod->l_ost_idx), POSTID(&oi));
  85        }
  86}
  87
  88void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm)
  89{
  90        lov_dump_lmm_common(level, lmm);
  91        lov_dump_lmm_objects(level, lmm->lmm_objects,
  92                             le16_to_cpu(lmm->lmm_stripe_count));
  93}
  94
  95void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
  96{
  97        lov_dump_lmm_common(level, lmm);
  98        CDEBUG(level, "pool_name "LOV_POOLNAMEF"\n", lmm->lmm_pool_name);
  99        lov_dump_lmm_objects(level, lmm->lmm_objects,
 100                             le16_to_cpu(lmm->lmm_stripe_count));
 101}
 102
 103void lov_dump_lmm(int level, void *lmm)
 104{
 105        int magic;
 106
 107        magic = le32_to_cpu(((struct lov_mds_md *)lmm)->lmm_magic);
 108        switch (magic) {
 109        case LOV_MAGIC_V1:
 110                lov_dump_lmm_v1(level, (struct lov_mds_md_v1 *)lmm);
 111                break;
 112        case LOV_MAGIC_V3:
 113                lov_dump_lmm_v3(level, (struct lov_mds_md_v3 *)lmm);
 114                break;
 115        default:
 116                CDEBUG(level, "unrecognized lmm_magic %x, assuming %x\n",
 117                       magic, LOV_MAGIC_V1);
 118                lov_dump_lmm_common(level, lmm);
 119                break;
 120        }
 121}
 122
 123/* Pack LOV object metadata for disk storage.  It is packed in LE byte
 124 * order and is opaque to the networking layer.
 125 *
 126 * XXX In the future, this will be enhanced to get the EA size from the
 127 *     underlying OSC device(s) to get their EA sizes so we can stack
 128 *     LOVs properly.  For now lov_mds_md_size() just assumes one u64
 129 *     per stripe.
 130 */
 131int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 132               struct lov_stripe_md *lsm)
 133{
 134        struct obd_device *obd = class_exp2obd(exp);
 135        struct lov_obd *lov = &obd->u.lov;
 136        struct lov_mds_md_v1 *lmmv1;
 137        struct lov_mds_md_v3 *lmmv3;
 138        __u16 stripe_count;
 139        struct lov_ost_data_v1 *lmm_objects;
 140        int lmm_size, lmm_magic;
 141        int i;
 142        int cplen = 0;
 143
 144        if (lsm) {
 145                lmm_magic = lsm->lsm_magic;
 146        } else {
 147                if (lmmp && *lmmp)
 148                        lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
 149                else
 150                        /* lsm == NULL and lmmp == NULL */
 151                        lmm_magic = LOV_MAGIC;
 152        }
 153
 154        if ((lmm_magic != LOV_MAGIC_V1) &&
 155            (lmm_magic != LOV_MAGIC_V3)) {
 156                CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
 157                        lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
 158                return -EINVAL;
 159
 160        }
 161
 162        if (lsm) {
 163                /* If we are just sizing the EA, limit the stripe count
 164                 * to the actual number of OSTs in this filesystem. */
 165                if (!lmmp) {
 166                        stripe_count = lov_get_stripecnt(lov, lmm_magic,
 167                                                        lsm->lsm_stripe_count);
 168                        lsm->lsm_stripe_count = stripe_count;
 169                } else if (!lsm_is_released(lsm)) {
 170                        stripe_count = lsm->lsm_stripe_count;
 171                } else {
 172                        stripe_count = 0;
 173                }
 174        } else {
 175                /* No need to allocate more than maximum supported stripes.
 176                 * Anyway, this is pretty inaccurate since ld_tgt_count now
 177                 * represents max index and we should rely on the actual number
 178                 * of OSTs instead */
 179                stripe_count = lov_mds_md_max_stripe_count(
 180                        lov->lov_ocd.ocd_max_easize, lmm_magic);
 181
 182                if (stripe_count > lov->desc.ld_tgt_count)
 183                        stripe_count = lov->desc.ld_tgt_count;
 184        }
 185
 186        /* XXX LOV STACKING call into osc for sizes */
 187        lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
 188
 189        if (!lmmp)
 190                return lmm_size;
 191
 192        if (*lmmp && !lsm) {
 193                stripe_count = le16_to_cpu((*lmmp)->lmm_stripe_count);
 194                lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
 195                OBD_FREE_LARGE(*lmmp, lmm_size);
 196                *lmmp = NULL;
 197                return 0;
 198        }
 199
 200        if (!*lmmp) {
 201                OBD_ALLOC_LARGE(*lmmp, lmm_size);
 202                if (!*lmmp)
 203                        return -ENOMEM;
 204        }
 205
 206        CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d \n",
 207               lmm_magic, lmm_size);
 208
 209        lmmv1 = *lmmp;
 210        lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
 211        if (lmm_magic == LOV_MAGIC_V3)
 212                lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
 213        else
 214                lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);
 215
 216        if (!lsm)
 217                return lmm_size;
 218
 219        /* lmmv1 and lmmv3 point to the same struct and have the
 220         * same first fields
 221         */
 222        lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi);
 223        lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
 224        lmmv1->lmm_stripe_count = cpu_to_le16(stripe_count);
 225        lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
 226        lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen);
 227        if (lsm->lsm_magic == LOV_MAGIC_V3) {
 228                cplen = strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
 229                                sizeof(lmmv3->lmm_pool_name));
 230                if (cplen >= sizeof(lmmv3->lmm_pool_name))
 231                        return -E2BIG;
 232                lmm_objects = lmmv3->lmm_objects;
 233        } else {
 234                lmm_objects = lmmv1->lmm_objects;
 235        }
 236
 237        for (i = 0; i < stripe_count; i++) {
 238                struct lov_oinfo *loi = lsm->lsm_oinfo[i];
 239                /* XXX LOV STACKING call down to osc_packmd() to do packing */
 240                LASSERTF(ostid_id(&loi->loi_oi) != 0, "lmm_oi "DOSTID
 241                         " stripe %u/%u idx %u\n", POSTID(&lmmv1->lmm_oi),
 242                         i, stripe_count, loi->loi_ost_idx);
 243                ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
 244                lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
 245                lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
 246        }
 247
 248        return lmm_size;
 249}
 250
 251/* Find the max stripecount we should use */
 252__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count)
 253{
 254        __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
 255
 256        if (!stripe_count)
 257                stripe_count = lov->desc.ld_default_stripe_count;
 258        if (stripe_count > lov->desc.ld_active_tgt_count)
 259                stripe_count = lov->desc.ld_active_tgt_count;
 260        if (!stripe_count)
 261                stripe_count = 1;
 262
 263        /* stripe count is based on whether ldiskfs can handle
 264         * larger EA sizes */
 265        if (lov->lov_ocd.ocd_connect_flags & OBD_CONNECT_MAX_EASIZE &&
 266            lov->lov_ocd.ocd_max_easize)
 267                max_stripes = lov_mds_md_max_stripe_count(
 268                        lov->lov_ocd.ocd_max_easize, magic);
 269
 270        if (stripe_count > max_stripes)
 271                stripe_count = max_stripes;
 272
 273        return stripe_count;
 274}
 275
 276
 277static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count)
 278{
 279        int rc;
 280
 281        if (lsm_op_find(le32_to_cpu(*(__u32 *)lmm)) == NULL) {
 282                char *buffer;
 283                int sz;
 284
 285                CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
 286                       le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
 287                sz = lmm_bytes * 2 + 1;
 288                OBD_ALLOC_LARGE(buffer, sz);
 289                if (buffer != NULL) {
 290                        int i;
 291
 292                        for (i = 0; i < lmm_bytes; i++)
 293                                sprintf(buffer+2*i, "%.2X", ((char *)lmm)[i]);
 294                        buffer[sz - 1] = '\0';
 295                        CERROR("%s\n", buffer);
 296                        OBD_FREE_LARGE(buffer, sz);
 297                }
 298                return -EINVAL;
 299        }
 300        rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
 301                                     lmm_bytes, stripe_count);
 302        return rc;
 303}
 304
 305int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
 306                    int pattern, int magic)
 307{
 308        int i, lsm_size;
 309
 310        CDEBUG(D_INFO, "alloc lsm, stripe_count %d\n", stripe_count);
 311
 312        *lsmp = lsm_alloc_plain(stripe_count, &lsm_size);
 313        if (!*lsmp) {
 314                CERROR("can't allocate lsmp stripe_count %d\n", stripe_count);
 315                return -ENOMEM;
 316        }
 317
 318        atomic_set(&(*lsmp)->lsm_refc, 1);
 319        spin_lock_init(&(*lsmp)->lsm_lock);
 320        (*lsmp)->lsm_magic = magic;
 321        (*lsmp)->lsm_stripe_count = stripe_count;
 322        (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
 323        (*lsmp)->lsm_pattern = pattern;
 324        (*lsmp)->lsm_pool_name[0] = '\0';
 325        (*lsmp)->lsm_layout_gen = 0;
 326        if (stripe_count > 0)
 327                (*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0;
 328
 329        for (i = 0; i < stripe_count; i++)
 330                loi_init((*lsmp)->lsm_oinfo[i]);
 331
 332        return lsm_size;
 333}
 334
 335int lov_free_memmd(struct lov_stripe_md **lsmp)
 336{
 337        struct lov_stripe_md *lsm = *lsmp;
 338        int refc;
 339
 340        *lsmp = NULL;
 341        LASSERT(atomic_read(&lsm->lsm_refc) > 0);
 342        refc = atomic_dec_return(&lsm->lsm_refc);
 343        if (refc == 0) {
 344                LASSERT(lsm_op_find(lsm->lsm_magic) != NULL);
 345                lsm_op_find(lsm->lsm_magic)->lsm_free(lsm);
 346        }
 347        return refc;
 348}
 349
 350
 351/* Unpack LOV object metadata from disk storage.  It is packed in LE byte
 352 * order and is opaque to the networking layer.
 353 */
 354int lov_unpackmd(struct obd_export *exp,  struct lov_stripe_md **lsmp,
 355                 struct lov_mds_md *lmm, int lmm_bytes)
 356{
 357        struct obd_device *obd = class_exp2obd(exp);
 358        struct lov_obd *lov = &obd->u.lov;
 359        int rc = 0, lsm_size;
 360        __u16 stripe_count;
 361        __u32 magic;
 362        __u32 pattern;
 363
 364        /* If passed an MDS struct use values from there, otherwise defaults */
 365        if (lmm) {
 366                rc = lov_verify_lmm(lmm, lmm_bytes, &stripe_count);
 367                if (rc)
 368                        return rc;
 369                magic = le32_to_cpu(lmm->lmm_magic);
 370        } else {
 371                magic = LOV_MAGIC;
 372                stripe_count = lov_get_stripecnt(lov, magic, 0);
 373        }
 374
 375        /* If we aren't passed an lsmp struct, we just want the size */
 376        if (!lsmp) {
 377                /* XXX LOV STACKING call into osc for sizes */
 378                LBUG();
 379                return lov_stripe_md_size(stripe_count);
 380        }
 381        /* If we are passed an allocated struct but nothing to unpack, free */
 382        if (*lsmp && !lmm) {
 383                lov_free_memmd(lsmp);
 384                return 0;
 385        }
 386
 387        pattern = le32_to_cpu(lmm->lmm_pattern);
 388        lsm_size = lov_alloc_memmd(lsmp, stripe_count, pattern, magic);
 389        if (lsm_size < 0)
 390                return lsm_size;
 391
 392        /* If we are passed a pointer but nothing to unpack, we only alloc */
 393        if (!lmm)
 394                return lsm_size;
 395
 396        LASSERT(lsm_op_find(magic) != NULL);
 397        rc = lsm_op_find(magic)->lsm_unpackmd(lov, *lsmp, lmm);
 398        if (rc) {
 399                lov_free_memmd(lsmp);
 400                return rc;
 401        }
 402
 403        return lsm_size;
 404}
 405
 406/* Retrieve object striping information.
 407 *
 408 * @lump is a pointer to an in-core struct with lmm_ost_count indicating
 409 * the maximum number of OST indices which will fit in the user buffer.
 410 * lmm_magic must be LOV_USER_MAGIC.
 411 */
 412int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 413                  struct lov_user_md *lump)
 414{
 415        /*
 416         * XXX huge struct allocated on stack.
 417         */
 418        /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
 419        struct lov_user_md_v3 lum;
 420        struct lov_mds_md *lmmk = NULL;
 421        int rc, lmm_size;
 422        int lum_size;
 423        mm_segment_t seg;
 424
 425        if (!lsm)
 426                return -ENODATA;
 427
 428        /*
 429         * "Switch to kernel segment" to allow copying from kernel space by
 430         * copy_{to,from}_user().
 431         */
 432        seg = get_fs();
 433        set_fs(KERNEL_DS);
 434
 435        /* we only need the header part from user space to get lmm_magic and
 436         * lmm_stripe_count, (the header part is common to v1 and v3) */
 437        lum_size = sizeof(struct lov_user_md_v1);
 438        if (copy_from_user(&lum, lump, lum_size)) {
 439                rc = -EFAULT;
 440                goto out_set;
 441        } else if ((lum.lmm_magic != LOV_USER_MAGIC) &&
 442                 (lum.lmm_magic != LOV_USER_MAGIC_V3)) {
 443                rc = -EINVAL;
 444                goto out_set;
 445        }
 446
 447        if (lum.lmm_stripe_count &&
 448            (lum.lmm_stripe_count < lsm->lsm_stripe_count)) {
 449                /* Return right size of stripe to user */
 450                lum.lmm_stripe_count = lsm->lsm_stripe_count;
 451                rc = copy_to_user(lump, &lum, lum_size);
 452                rc = -EOVERFLOW;
 453                goto out_set;
 454        }
 455        rc = lov_packmd(exp, &lmmk, lsm);
 456        if (rc < 0)
 457                goto out_set;
 458        lmm_size = rc;
 459        rc = 0;
 460
 461        /* FIXME: Bug 1185 - copy fields properly when structs change */
 462        /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */
 463        CLASSERT(sizeof(lum) == sizeof(struct lov_mds_md_v3));
 464        CLASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lmmk->lmm_objects[0]));
 465
 466        if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
 467            ((lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) ||
 468            (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)))) {
 469                lustre_swab_lov_mds_md(lmmk);
 470                lustre_swab_lov_user_md_objects(
 471                                (struct lov_user_ost_data *)lmmk->lmm_objects,
 472                                lmmk->lmm_stripe_count);
 473        }
 474        if (lum.lmm_magic == LOV_USER_MAGIC) {
 475                /* User request for v1, we need skip lmm_pool_name */
 476                if (lmmk->lmm_magic == LOV_MAGIC_V3) {
 477                        memmove((char *)(&lmmk->lmm_stripe_count) +
 478                                sizeof(lmmk->lmm_stripe_count),
 479                                ((struct lov_mds_md_v3 *)lmmk)->lmm_objects,
 480                                lmmk->lmm_stripe_count *
 481                                sizeof(struct lov_ost_data_v1));
 482                        lmm_size -= LOV_MAXPOOLNAME;
 483                }
 484        } else {
 485                /* if v3 we just have to update the lum_size */
 486                lum_size = sizeof(struct lov_user_md_v3);
 487        }
 488
 489        /* User wasn't expecting this many OST entries */
 490        if (lum.lmm_stripe_count == 0)
 491                lmm_size = lum_size;
 492        else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
 493                rc = -EOVERFLOW;
 494                goto out_set;
 495        }
 496        /*
 497         * Have a difference between lov_mds_md & lov_user_md.
 498         * So we have to re-order the data before copy to user.
 499         */
 500        lum.lmm_stripe_count = lmmk->lmm_stripe_count;
 501        lum.lmm_layout_gen = lmmk->lmm_layout_gen;
 502        ((struct lov_user_md *)lmmk)->lmm_layout_gen = lum.lmm_layout_gen;
 503        ((struct lov_user_md *)lmmk)->lmm_stripe_count = lum.lmm_stripe_count;
 504        if (copy_to_user(lump, lmmk, lmm_size))
 505                rc = -EFAULT;
 506
 507        obd_free_diskmd(exp, &lmmk);
 508out_set:
 509        set_fs(seg);
 510        return rc;
 511}
 512