linux/drivers/staging/lustre/lustre/lov/lov_pool.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see [sun.com URL with a
  18 * copy of GPLv2].
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * lustre/lov/lov_pool.c
  37 *
  38 * OST pool methods
  39 *
  40 * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
  41 * Author: Alex Lyashkov <Alexey.Lyashkov@Sun.COM>
  42 * Author: Nathaniel Rutman <Nathan.Rutman@Sun.COM>
  43 */
  44
  45#define DEBUG_SUBSYSTEM S_LOV
  46
  47#include "../../include/linux/libcfs/libcfs.h"
  48
  49#include "../include/obd.h"
  50#include "lov_internal.h"
  51
  52#define pool_tgt(_p, _i) \
  53                _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]]
  54
  55static void lov_pool_getref(struct pool_desc *pool)
  56{
  57        CDEBUG(D_INFO, "pool %p\n", pool);
  58        atomic_inc(&pool->pool_refcount);
  59}
  60
  61void lov_pool_putref(struct pool_desc *pool)
  62{
  63        CDEBUG(D_INFO, "pool %p\n", pool);
  64        if (atomic_dec_and_test(&pool->pool_refcount)) {
  65                LASSERT(hlist_unhashed(&pool->pool_hash));
  66                LASSERT(list_empty(&pool->pool_list));
  67                LASSERT(pool->pool_proc_entry == NULL);
  68                lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
  69                lov_ost_pool_free(&(pool->pool_obds));
  70                OBD_FREE_PTR(pool);
  71        }
  72}
  73
  74static void lov_pool_putref_locked(struct pool_desc *pool)
  75{
  76        CDEBUG(D_INFO, "pool %p\n", pool);
  77        LASSERT(atomic_read(&pool->pool_refcount) > 1);
  78
  79        atomic_dec(&pool->pool_refcount);
  80}
  81
  82/*
  83 * hash function using a Rotating Hash algorithm
  84 * Knuth, D. The Art of Computer Programming,
  85 * Volume 3: Sorting and Searching,
  86 * Chapter 6.4.
  87 * Addison Wesley, 1973
  88 */
  89static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key, unsigned mask)
  90{
  91        int i;
  92        __u32 result;
  93        char *poolname;
  94
  95        result = 0;
  96        poolname = (char *)key;
  97        for (i = 0; i < LOV_MAXPOOLNAME; i++) {
  98                if (poolname[i] == '\0')
  99                        break;
 100                result = (result << 4)^(result >> 28) ^  poolname[i];
 101        }
 102        return (result % mask);
 103}
 104
 105static void *pool_key(struct hlist_node *hnode)
 106{
 107        struct pool_desc *pool;
 108
 109        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
 110        return pool->pool_name;
 111}
 112
 113static int pool_hashkey_keycmp(const void *key, struct hlist_node *compared_hnode)
 114{
 115        char *pool_name;
 116        struct pool_desc *pool;
 117
 118        pool_name = (char *)key;
 119        pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
 120        return !strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
 121}
 122
 123static void *pool_hashobject(struct hlist_node *hnode)
 124{
 125        return hlist_entry(hnode, struct pool_desc, pool_hash);
 126}
 127
 128static void pool_hashrefcount_get(struct cfs_hash *hs, struct hlist_node *hnode)
 129{
 130        struct pool_desc *pool;
 131
 132        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
 133        lov_pool_getref(pool);
 134}
 135
 136static void pool_hashrefcount_put_locked(struct cfs_hash *hs,
 137                                         struct hlist_node *hnode)
 138{
 139        struct pool_desc *pool;
 140
 141        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
 142        lov_pool_putref_locked(pool);
 143}
 144
 145cfs_hash_ops_t pool_hash_operations = {
 146        .hs_hash        = pool_hashfn,
 147        .hs_key  = pool_key,
 148        .hs_keycmp      = pool_hashkey_keycmp,
 149        .hs_object      = pool_hashobject,
 150        .hs_get  = pool_hashrefcount_get,
 151        .hs_put_locked  = pool_hashrefcount_put_locked,
 152
 153};
 154
 155#if defined (CONFIG_PROC_FS)
 156/* ifdef needed for liblustre support */
 157/*
 158 * pool /proc seq_file methods
 159 */
 160/*
 161 * iterator is used to go through the target pool entries
 162 * index is the current entry index in the lp_array[] array
 163 * index >= pos returned to the seq_file interface
 164 * pos is from 0 to (pool->pool_obds.op_count - 1)
 165 */
 166#define POOL_IT_MAGIC 0xB001CEA0
 167struct pool_iterator {
 168        int magic;
 169        struct pool_desc *pool;
 170        int idx;        /* from 0 to pool_tgt_size - 1 */
 171};
 172
 173static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
 174{
 175        struct pool_iterator *iter = (struct pool_iterator *)s->private;
 176        int prev_idx;
 177
 178        LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
 179
 180        /* test if end of file */
 181        if (*pos >= pool_tgt_count(iter->pool))
 182                return NULL;
 183
 184        /* iterate to find a non empty entry */
 185        prev_idx = iter->idx;
 186        down_read(&pool_tgt_rw_sem(iter->pool));
 187        iter->idx++;
 188        if (iter->idx == pool_tgt_count(iter->pool)) {
 189                iter->idx = prev_idx; /* we stay on the last entry */
 190                up_read(&pool_tgt_rw_sem(iter->pool));
 191                return NULL;
 192        }
 193        up_read(&pool_tgt_rw_sem(iter->pool));
 194        (*pos)++;
 195        /* return != NULL to continue */
 196        return iter;
 197}
 198
 199static void *pool_proc_start(struct seq_file *s, loff_t *pos)
 200{
 201        struct pool_desc *pool = (struct pool_desc *)s->private;
 202        struct pool_iterator *iter;
 203
 204        lov_pool_getref(pool);
 205        if ((pool_tgt_count(pool) == 0) ||
 206            (*pos >= pool_tgt_count(pool))) {
 207                /* iter is not created, so stop() has no way to
 208                 * find pool to dec ref */
 209                lov_pool_putref(pool);
 210                return NULL;
 211        }
 212
 213        OBD_ALLOC_PTR(iter);
 214        if (!iter)
 215                return ERR_PTR(-ENOMEM);
 216        iter->magic = POOL_IT_MAGIC;
 217        iter->pool = pool;
 218        iter->idx = 0;
 219
 220        /* we use seq_file private field to memorized iterator so
 221         * we can free it at stop() */
 222        /* /!\ do not forget to restore it to pool before freeing it */
 223        s->private = iter;
 224        if (*pos > 0) {
 225                loff_t i;
 226                void *ptr;
 227
 228                i = 0;
 229                do {
 230                     ptr = pool_proc_next(s, &iter, &i);
 231                } while ((i < *pos) && (ptr != NULL));
 232                return ptr;
 233        }
 234        return iter;
 235}
 236
 237static void pool_proc_stop(struct seq_file *s, void *v)
 238{
 239        struct pool_iterator *iter = (struct pool_iterator *)s->private;
 240
 241        /* in some cases stop() method is called 2 times, without
 242         * calling start() method (see seq_read() from fs/seq_file.c)
 243         * we have to free only if s->private is an iterator */
 244        if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
 245                /* we restore s->private so next call to pool_proc_start()
 246                 * will work */
 247                s->private = iter->pool;
 248                lov_pool_putref(iter->pool);
 249                OBD_FREE_PTR(iter);
 250        }
 251        return;
 252}
 253
 254static int pool_proc_show(struct seq_file *s, void *v)
 255{
 256        struct pool_iterator *iter = (struct pool_iterator *)v;
 257        struct lov_tgt_desc *tgt;
 258
 259        LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
 260        LASSERT(iter->pool != NULL);
 261        LASSERT(iter->idx <= pool_tgt_count(iter->pool));
 262
 263        down_read(&pool_tgt_rw_sem(iter->pool));
 264        tgt = pool_tgt(iter->pool, iter->idx);
 265        up_read(&pool_tgt_rw_sem(iter->pool));
 266        if (tgt)
 267                seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
 268
 269        return 0;
 270}
 271
 272static struct seq_operations pool_proc_ops = {
 273        .start    = pool_proc_start,
 274        .next      = pool_proc_next,
 275        .stop      = pool_proc_stop,
 276        .show      = pool_proc_show,
 277};
 278
 279static int pool_proc_open(struct inode *inode, struct file *file)
 280{
 281        int rc;
 282
 283        rc = seq_open(file, &pool_proc_ops);
 284        if (!rc) {
 285                struct seq_file *s = file->private_data;
 286                s->private = PDE_DATA(inode);
 287        }
 288        return rc;
 289}
 290
 291static struct file_operations pool_proc_operations = {
 292        .open      = pool_proc_open,
 293        .read      = seq_read,
 294        .llseek  = seq_lseek,
 295        .release        = seq_release,
 296};
 297#endif /* CONFIG_PROC_FS */
 298
 299void lov_dump_pool(int level, struct pool_desc *pool)
 300{
 301        int i;
 302
 303        lov_pool_getref(pool);
 304
 305        CDEBUG(level, "pool "LOV_POOLNAMEF" has %d members\n",
 306               pool->pool_name, pool->pool_obds.op_count);
 307        down_read(&pool_tgt_rw_sem(pool));
 308
 309        for (i = 0; i < pool_tgt_count(pool) ; i++) {
 310                if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
 311                        continue;
 312                CDEBUG(level, "pool "LOV_POOLNAMEF"[%d] = %s\n",
 313                       pool->pool_name, i,
 314                       obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
 315        }
 316
 317        up_read(&pool_tgt_rw_sem(pool));
 318        lov_pool_putref(pool);
 319}
 320
 321#define LOV_POOL_INIT_COUNT 2
 322int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
 323{
 324        if (count == 0)
 325                count = LOV_POOL_INIT_COUNT;
 326        op->op_array = NULL;
 327        op->op_count = 0;
 328        init_rwsem(&op->op_rw_sem);
 329        op->op_size = count;
 330        OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
 331        if (op->op_array == NULL) {
 332                op->op_size = 0;
 333                return -ENOMEM;
 334        }
 335        return 0;
 336}
 337
 338/* Caller must hold write op_rwlock */
 339int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
 340{
 341        __u32 *new;
 342        int new_size;
 343
 344        LASSERT(min_count != 0);
 345
 346        if (op->op_count < op->op_size)
 347                return 0;
 348
 349        new_size = max(min_count, 2 * op->op_size);
 350        OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
 351        if (new == NULL)
 352                return -ENOMEM;
 353
 354        /* copy old array to new one */
 355        memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
 356        OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
 357        op->op_array = new;
 358        op->op_size = new_size;
 359        return 0;
 360}
 361
 362int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
 363{
 364        int rc = 0, i;
 365
 366        down_write(&op->op_rw_sem);
 367
 368        rc = lov_ost_pool_extend(op, min_count);
 369        if (rc)
 370                goto out;
 371
 372        /* search ost in pool array */
 373        for (i = 0; i < op->op_count; i++) {
 374                if (op->op_array[i] == idx) {
 375                        rc = -EEXIST;
 376                        goto out;
 377                }
 378        }
 379        /* ost not found we add it */
 380        op->op_array[op->op_count] = idx;
 381        op->op_count++;
 382out:
 383        up_write(&op->op_rw_sem);
 384        return rc;
 385}
 386
 387int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
 388{
 389        int i;
 390
 391        down_write(&op->op_rw_sem);
 392
 393        for (i = 0; i < op->op_count; i++) {
 394                if (op->op_array[i] == idx) {
 395                        memmove(&op->op_array[i], &op->op_array[i + 1],
 396                                (op->op_count - i - 1) * sizeof(op->op_array[0]));
 397                        op->op_count--;
 398                        up_write(&op->op_rw_sem);
 399                        return 0;
 400                }
 401        }
 402
 403        up_write(&op->op_rw_sem);
 404        return -EINVAL;
 405}
 406
 407int lov_ost_pool_free(struct ost_pool *op)
 408{
 409        if (op->op_size == 0)
 410                return 0;
 411
 412        down_write(&op->op_rw_sem);
 413
 414        OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
 415        op->op_array = NULL;
 416        op->op_count = 0;
 417        op->op_size = 0;
 418
 419        up_write(&op->op_rw_sem);
 420        return 0;
 421}
 422
 423
 424int lov_pool_new(struct obd_device *obd, char *poolname)
 425{
 426        struct lov_obd *lov;
 427        struct pool_desc *new_pool;
 428        int rc;
 429
 430        lov = &(obd->u.lov);
 431
 432        if (strlen(poolname) > LOV_MAXPOOLNAME)
 433                return -ENAMETOOLONG;
 434
 435        OBD_ALLOC_PTR(new_pool);
 436        if (new_pool == NULL)
 437                return -ENOMEM;
 438
 439        strncpy(new_pool->pool_name, poolname, LOV_MAXPOOLNAME);
 440        new_pool->pool_name[LOV_MAXPOOLNAME] = '\0';
 441        new_pool->pool_lobd = obd;
 442        /* ref count init to 1 because when created a pool is always used
 443         * up to deletion
 444         */
 445        atomic_set(&new_pool->pool_refcount, 1);
 446        rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
 447        if (rc)
 448                goto out_err;
 449
 450        memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
 451        rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
 452        if (rc)
 453                goto out_free_pool_obds;
 454
 455        INIT_HLIST_NODE(&new_pool->pool_hash);
 456
 457#if defined (CONFIG_PROC_FS)
 458        /* we need this assert seq_file is not implemented for liblustre */
 459        /* get ref for /proc file */
 460        lov_pool_getref(new_pool);
 461        new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
 462                                                       poolname, new_pool,
 463                                                       &pool_proc_operations);
 464        if (IS_ERR(new_pool->pool_proc_entry)) {
 465                CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
 466                new_pool->pool_proc_entry = NULL;
 467                lov_pool_putref(new_pool);
 468        }
 469        CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool, new_pool->pool_proc_entry);
 470#endif
 471
 472        spin_lock(&obd->obd_dev_lock);
 473        list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
 474        lov->lov_pool_count++;
 475        spin_unlock(&obd->obd_dev_lock);
 476
 477        /* add to find only when it fully ready  */
 478        rc = cfs_hash_add_unique(lov->lov_pools_hash_body, poolname,
 479                                 &new_pool->pool_hash);
 480        if (rc) {
 481                rc = -EEXIST;
 482                goto out_err;
 483        }
 484
 485        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
 486               poolname, lov->lov_pool_count);
 487
 488        return 0;
 489
 490out_err:
 491        spin_lock(&obd->obd_dev_lock);
 492        list_del_init(&new_pool->pool_list);
 493        lov->lov_pool_count--;
 494        spin_unlock(&obd->obd_dev_lock);
 495
 496        lprocfs_remove(&new_pool->pool_proc_entry);
 497
 498        lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
 499out_free_pool_obds:
 500        lov_ost_pool_free(&new_pool->pool_obds);
 501        OBD_FREE_PTR(new_pool);
 502        return rc;
 503}
 504
 505int lov_pool_del(struct obd_device *obd, char *poolname)
 506{
 507        struct lov_obd *lov;
 508        struct pool_desc *pool;
 509
 510        lov = &(obd->u.lov);
 511
 512        /* lookup and kill hash reference */
 513        pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
 514        if (pool == NULL)
 515                return -ENOENT;
 516
 517        if (pool->pool_proc_entry != NULL) {
 518                CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
 519                lprocfs_remove(&pool->pool_proc_entry);
 520                lov_pool_putref(pool);
 521        }
 522
 523        spin_lock(&obd->obd_dev_lock);
 524        list_del_init(&pool->pool_list);
 525        lov->lov_pool_count--;
 526        spin_unlock(&obd->obd_dev_lock);
 527
 528        /* release last reference */
 529        lov_pool_putref(pool);
 530
 531        return 0;
 532}
 533
 534
 535int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 536{
 537        struct obd_uuid ost_uuid;
 538        struct lov_obd *lov;
 539        struct pool_desc *pool;
 540        unsigned int lov_idx;
 541        int rc;
 542
 543        lov = &(obd->u.lov);
 544
 545        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
 546        if (pool == NULL)
 547                return -ENOENT;
 548
 549        obd_str2uuid(&ost_uuid, ostname);
 550
 551
 552        /* search ost in lov array */
 553        obd_getref(obd);
 554        for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
 555                if (!lov->lov_tgts[lov_idx])
 556                        continue;
 557                if (obd_uuid_equals(&ost_uuid,
 558                                    &(lov->lov_tgts[lov_idx]->ltd_uuid)))
 559                        break;
 560        }
 561        /* test if ost found in lov */
 562        if (lov_idx == lov->desc.ld_tgt_count) {
 563                rc = -EINVAL;
 564                goto out;
 565        }
 566
 567        rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
 568        if (rc)
 569                goto out;
 570
 571        pool->pool_rr.lqr_dirty = 1;
 572
 573        CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
 574               ostname, poolname,  pool_tgt_count(pool));
 575
 576out:
 577        obd_putref(obd);
 578        lov_pool_putref(pool);
 579        return rc;
 580}
 581
 582int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 583{
 584        struct obd_uuid ost_uuid;
 585        struct lov_obd *lov;
 586        struct pool_desc *pool;
 587        unsigned int lov_idx;
 588        int rc = 0;
 589
 590        lov = &(obd->u.lov);
 591
 592        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
 593        if (pool == NULL)
 594                return -ENOENT;
 595
 596        obd_str2uuid(&ost_uuid, ostname);
 597
 598        obd_getref(obd);
 599        /* search ost in lov array, to get index */
 600        for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
 601                if (!lov->lov_tgts[lov_idx])
 602                        continue;
 603
 604                if (obd_uuid_equals(&ost_uuid,
 605                                    &(lov->lov_tgts[lov_idx]->ltd_uuid)))
 606                        break;
 607        }
 608
 609        /* test if ost found in lov */
 610        if (lov_idx == lov->desc.ld_tgt_count) {
 611                rc = -EINVAL;
 612                goto out;
 613        }
 614
 615        lov_ost_pool_remove(&pool->pool_obds, lov_idx);
 616
 617        pool->pool_rr.lqr_dirty = 1;
 618
 619        CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
 620               poolname);
 621
 622out:
 623        obd_putref(obd);
 624        lov_pool_putref(pool);
 625        return rc;
 626}
 627
 628int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
 629{
 630        int i, rc;
 631
 632        /* caller may no have a ref on pool if it got the pool
 633         * without calling lov_find_pool() (e.g. go through the lov pool
 634         * list)
 635         */
 636        lov_pool_getref(pool);
 637
 638        down_read(&pool_tgt_rw_sem(pool));
 639
 640        for (i = 0; i < pool_tgt_count(pool); i++) {
 641                if (pool_tgt_array(pool)[i] == idx) {
 642                        rc = 0;
 643                        goto out;
 644                }
 645        }
 646        rc = -ENOENT;
 647out:
 648        up_read(&pool_tgt_rw_sem(pool));
 649
 650        lov_pool_putref(pool);
 651        return rc;
 652}
 653
 654struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
 655{
 656        struct pool_desc *pool;
 657
 658        pool = NULL;
 659        if (poolname[0] != '\0') {
 660                pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
 661                if (pool == NULL)
 662                        CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
 663                              poolname);
 664                if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
 665                        CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
 666                               poolname);
 667                        /* pool is ignored, so we remove ref on it */
 668                        lov_pool_putref(pool);
 669                        pool = NULL;
 670                }
 671        }
 672        return pool;
 673}
 674