linux/drivers/md/dm-log-userspace-base.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2006-2009 Red Hat, Inc.
   3 *
   4 * This file is released under the LGPL.
   5 */
   6
   7#include <linux/bio.h>
   8#include <linux/dm-dirty-log.h>
   9#include <linux/device-mapper.h>
  10#include <linux/dm-log-userspace.h>
  11
  12#include "dm-log-userspace-transfer.h"
  13
  14struct flush_entry {
  15        int type;
  16        region_t region;
  17        struct list_head list;
  18};
  19
  20struct log_c {
  21        struct dm_target *ti;
  22        uint32_t region_size;
  23        region_t region_count;
  24        uint64_t luid;
  25        char uuid[DM_UUID_LEN];
  26
  27        char *usr_argv_str;
  28        uint32_t usr_argc;
  29
  30        /*
  31         * in_sync_hint gets set when doing is_remote_recovering.  It
  32         * represents the first region that needs recovery.  IOW, the
  33         * first zero bit of sync_bits.  This can be useful for to limit
  34         * traffic for calls like is_remote_recovering and get_resync_work,
  35         * but be take care in its use for anything else.
  36         */
  37        uint64_t in_sync_hint;
  38
  39        spinlock_t flush_lock;
  40        struct list_head flush_list;  /* only for clear and mark requests */
  41};
  42
  43static mempool_t *flush_entry_pool;
  44
  45static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
  46{
  47        return kmalloc(sizeof(struct flush_entry), gfp_mask);
  48}
  49
  50static void flush_entry_free(void *element, void *pool_data)
  51{
  52        kfree(element);
  53}
  54
  55static int userspace_do_request(struct log_c *lc, const char *uuid,
  56                                int request_type, char *data, size_t data_size,
  57                                char *rdata, size_t *rdata_size)
  58{
  59        int r;
  60
  61        /*
  62         * If the server isn't there, -ESRCH is returned,
  63         * and we must keep trying until the server is
  64         * restored.
  65         */
  66retry:
  67        r = dm_consult_userspace(uuid, lc->luid, request_type, data,
  68                                 data_size, rdata, rdata_size);
  69
  70        if (r != -ESRCH)
  71                return r;
  72
  73        DMERR(" Userspace log server not found.");
  74        while (1) {
  75                set_current_state(TASK_INTERRUPTIBLE);
  76                schedule_timeout(2*HZ);
  77                DMWARN("Attempting to contact userspace log server...");
  78                r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
  79                                         lc->usr_argv_str,
  80                                         strlen(lc->usr_argv_str) + 1,
  81                                         NULL, NULL);
  82                if (!r)
  83                        break;
  84        }
  85        DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  86        r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
  87                                 0, NULL, NULL);
  88        if (!r)
  89                goto retry;
  90
  91        DMERR("Error trying to resume userspace log: %d", r);
  92
  93        return -ESRCH;
  94}
  95
  96static int build_constructor_string(struct dm_target *ti,
  97                                    unsigned argc, char **argv,
  98                                    char **ctr_str)
  99{
 100        int i, str_size;
 101        char *str = NULL;
 102
 103        *ctr_str = NULL;
 104
 105        for (i = 0, str_size = 0; i < argc; i++)
 106                str_size += strlen(argv[i]) + 1; /* +1 for space between args */
 107
 108        str_size += 20; /* Max number of chars in a printed u64 number */
 109
 110        str = kzalloc(str_size, GFP_KERNEL);
 111        if (!str) {
 112                DMWARN("Unable to allocate memory for constructor string");
 113                return -ENOMEM;
 114        }
 115
 116        str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
 117        for (i = 0; i < argc; i++)
 118                str_size += sprintf(str + str_size, " %s", argv[i]);
 119
 120        *ctr_str = str;
 121        return str_size;
 122}
 123
 124/*
 125 * userspace_ctr
 126 *
 127 * argv contains:
 128 *      <UUID> <other args>
 129 * Where 'other args' is the userspace implementation specific log
 130 * arguments.  An example might be:
 131 *      <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
 132 *
 133 * So, this module will strip off the <UUID> for identification purposes
 134 * when communicating with userspace about a log; but will pass on everything
 135 * else.
 136 */
 137static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 138                         unsigned argc, char **argv)
 139{
 140        int r = 0;
 141        int str_size;
 142        char *ctr_str = NULL;
 143        struct log_c *lc = NULL;
 144        uint64_t rdata;
 145        size_t rdata_size = sizeof(rdata);
 146
 147        if (argc < 3) {
 148                DMWARN("Too few arguments to userspace dirty log");
 149                return -EINVAL;
 150        }
 151
 152        lc = kmalloc(sizeof(*lc), GFP_KERNEL);
 153        if (!lc) {
 154                DMWARN("Unable to allocate userspace log context.");
 155                return -ENOMEM;
 156        }
 157
 158        /* The ptr value is sufficient for local unique id */
 159        lc->luid = (unsigned long)lc;
 160
 161        lc->ti = ti;
 162
 163        if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
 164                DMWARN("UUID argument too long.");
 165                kfree(lc);
 166                return -EINVAL;
 167        }
 168
 169        strncpy(lc->uuid, argv[0], DM_UUID_LEN);
 170        spin_lock_init(&lc->flush_lock);
 171        INIT_LIST_HEAD(&lc->flush_list);
 172
 173        str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
 174        if (str_size < 0) {
 175                kfree(lc);
 176                return str_size;
 177        }
 178
 179        /* Send table string */
 180        r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
 181                                 ctr_str, str_size, NULL, NULL);
 182
 183        if (r == -ESRCH) {
 184                DMERR("Userspace log server not found");
 185                goto out;
 186        }
 187
 188        /* Since the region size does not change, get it now */
 189        rdata_size = sizeof(rdata);
 190        r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
 191                                 NULL, 0, (char *)&rdata, &rdata_size);
 192
 193        if (r) {
 194                DMERR("Failed to get region size of dirty log");
 195                goto out;
 196        }
 197
 198        lc->region_size = (uint32_t)rdata;
 199        lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
 200
 201out:
 202        if (r) {
 203                kfree(lc);
 204                kfree(ctr_str);
 205        } else {
 206                lc->usr_argv_str = ctr_str;
 207                lc->usr_argc = argc;
 208                log->context = lc;
 209        }
 210
 211        return r;
 212}
 213
 214static void userspace_dtr(struct dm_dirty_log *log)
 215{
 216        int r;
 217        struct log_c *lc = log->context;
 218
 219        r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
 220                                 NULL, 0,
 221                                 NULL, NULL);
 222
 223        kfree(lc->usr_argv_str);
 224        kfree(lc);
 225
 226        return;
 227}
 228
 229static int userspace_presuspend(struct dm_dirty_log *log)
 230{
 231        int r;
 232        struct log_c *lc = log->context;
 233
 234        r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
 235                                 NULL, 0,
 236                                 NULL, NULL);
 237
 238        return r;
 239}
 240
 241static int userspace_postsuspend(struct dm_dirty_log *log)
 242{
 243        int r;
 244        struct log_c *lc = log->context;
 245
 246        r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
 247                                 NULL, 0,
 248                                 NULL, NULL);
 249
 250        return r;
 251}
 252
 253static int userspace_resume(struct dm_dirty_log *log)
 254{
 255        int r;
 256        struct log_c *lc = log->context;
 257
 258        lc->in_sync_hint = 0;
 259        r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
 260                                 NULL, 0,
 261                                 NULL, NULL);
 262
 263        return r;
 264}
 265
 266static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
 267{
 268        struct log_c *lc = log->context;
 269
 270        return lc->region_size;
 271}
 272
 273/*
 274 * userspace_is_clean
 275 *
 276 * Check whether a region is clean.  If there is any sort of
 277 * failure when consulting the server, we return not clean.
 278 *
 279 * Returns: 1 if clean, 0 otherwise
 280 */
 281static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
 282{
 283        int r;
 284        uint64_t region64 = (uint64_t)region;
 285        int64_t is_clean;
 286        size_t rdata_size;
 287        struct log_c *lc = log->context;
 288
 289        rdata_size = sizeof(is_clean);
 290        r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
 291                                 (char *)&region64, sizeof(region64),
 292                                 (char *)&is_clean, &rdata_size);
 293
 294        return (r) ? 0 : (int)is_clean;
 295}
 296
 297/*
 298 * userspace_in_sync
 299 *
 300 * Check if the region is in-sync.  If there is any sort
 301 * of failure when consulting the server, we assume that
 302 * the region is not in sync.
 303 *
 304 * If 'can_block' is set, return immediately
 305 *
 306 * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
 307 */
 308static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
 309                             int can_block)
 310{
 311        int r;
 312        uint64_t region64 = region;
 313        int64_t in_sync;
 314        size_t rdata_size;
 315        struct log_c *lc = log->context;
 316
 317        /*
 318         * We can never respond directly - even if in_sync_hint is
 319         * set.  This is because another machine could see a device
 320         * failure and mark the region out-of-sync.  If we don't go
 321         * to userspace to ask, we might think the region is in-sync
 322         * and allow a read to pick up data that is stale.  (This is
 323         * very unlikely if a device actually fails; but it is very
 324         * likely if a connection to one device from one machine fails.)
 325         *
 326         * There still might be a problem if the mirror caches the region
 327         * state as in-sync... but then this call would not be made.  So,
 328         * that is a mirror problem.
 329         */
 330        if (!can_block)
 331                return -EWOULDBLOCK;
 332
 333        rdata_size = sizeof(in_sync);
 334        r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
 335                                 (char *)&region64, sizeof(region64),
 336                                 (char *)&in_sync, &rdata_size);
 337        return (r) ? 0 : (int)in_sync;
 338}
 339
 340/*
 341 * userspace_flush
 342 *
 343 * This function is ok to block.
 344 * The flush happens in two stages.  First, it sends all
 345 * clear/mark requests that are on the list.  Then it
 346 * tells the server to commit them.  This gives the
 347 * server a chance to optimise the commit, instead of
 348 * doing it for every request.
 349 *
 350 * Additionally, we could implement another thread that
 351 * sends the requests up to the server - reducing the
 352 * load on flush.  Then the flush would have less in
 353 * the list and be responsible for the finishing commit.
 354 *
 355 * Returns: 0 on success, < 0 on failure
 356 */
 357static int userspace_flush(struct dm_dirty_log *log)
 358{
 359        int r = 0;
 360        unsigned long flags;
 361        struct log_c *lc = log->context;
 362        LIST_HEAD(flush_list);
 363        struct flush_entry *fe, *tmp_fe;
 364
 365        spin_lock_irqsave(&lc->flush_lock, flags);
 366        list_splice_init(&lc->flush_list, &flush_list);
 367        spin_unlock_irqrestore(&lc->flush_lock, flags);
 368
 369        if (list_empty(&flush_list))
 370                return 0;
 371
 372        /*
 373         * FIXME: Count up requests, group request types,
 374         * allocate memory to stick all requests in and
 375         * send to server in one go.  Failing the allocation,
 376         * do it one by one.
 377         */
 378
 379        list_for_each_entry(fe, &flush_list, list) {
 380                r = userspace_do_request(lc, lc->uuid, fe->type,
 381                                         (char *)&fe->region,
 382                                         sizeof(fe->region),
 383                                         NULL, NULL);
 384                if (r)
 385                        goto fail;
 386        }
 387
 388        r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
 389                                 NULL, 0, NULL, NULL);
 390
 391fail:
 392        /*
 393         * We can safely remove these entries, even if failure.
 394         * Calling code will receive an error and will know that
 395         * the log facility has failed.
 396         */
 397        list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
 398                list_del(&fe->list);
 399                mempool_free(fe, flush_entry_pool);
 400        }
 401
 402        if (r)
 403                dm_table_event(lc->ti->table);
 404
 405        return r;
 406}
 407
 408/*
 409 * userspace_mark_region
 410 *
 411 * This function should avoid blocking unless absolutely required.
 412 * (Memory allocation is valid for blocking.)
 413 */
 414static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
 415{
 416        unsigned long flags;
 417        struct log_c *lc = log->context;
 418        struct flush_entry *fe;
 419
 420        /* Wait for an allocation, but _never_ fail */
 421        fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
 422        BUG_ON(!fe);
 423
 424        spin_lock_irqsave(&lc->flush_lock, flags);
 425        fe->type = DM_ULOG_MARK_REGION;
 426        fe->region = region;
 427        list_add(&fe->list, &lc->flush_list);
 428        spin_unlock_irqrestore(&lc->flush_lock, flags);
 429
 430        return;
 431}
 432
 433/*
 434 * userspace_clear_region
 435 *
 436 * This function must not block.
 437 * So, the alloc can't block.  In the worst case, it is ok to
 438 * fail.  It would simply mean we can't clear the region.
 439 * Does nothing to current sync context, but does mean
 440 * the region will be re-sync'ed on a reload of the mirror
 441 * even though it is in-sync.
 442 */
 443static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
 444{
 445        unsigned long flags;
 446        struct log_c *lc = log->context;
 447        struct flush_entry *fe;
 448
 449        /*
 450         * If we fail to allocate, we skip the clearing of
 451         * the region.  This doesn't hurt us in any way, except
 452         * to cause the region to be resync'ed when the
 453         * device is activated next time.
 454         */
 455        fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
 456        if (!fe) {
 457                DMERR("Failed to allocate memory to clear region.");
 458                return;
 459        }
 460
 461        spin_lock_irqsave(&lc->flush_lock, flags);
 462        fe->type = DM_ULOG_CLEAR_REGION;
 463        fe->region = region;
 464        list_add(&fe->list, &lc->flush_list);
 465        spin_unlock_irqrestore(&lc->flush_lock, flags);
 466
 467        return;
 468}
 469
 470/*
 471 * userspace_get_resync_work
 472 *
 473 * Get a region that needs recovery.  It is valid to return
 474 * an error for this function.
 475 *
 476 * Returns: 1 if region filled, 0 if no work, <0 on error
 477 */
 478static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
 479{
 480        int r;
 481        size_t rdata_size;
 482        struct log_c *lc = log->context;
 483        struct {
 484                int64_t i; /* 64-bit for mix arch compatibility */
 485                region_t r;
 486        } pkg;
 487
 488        if (lc->in_sync_hint >= lc->region_count)
 489                return 0;
 490
 491        rdata_size = sizeof(pkg);
 492        r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
 493                                 NULL, 0,
 494                                 (char *)&pkg, &rdata_size);
 495
 496        *region = pkg.r;
 497        return (r) ? r : (int)pkg.i;
 498}
 499
 500/*
 501 * userspace_set_region_sync
 502 *
 503 * Set the sync status of a given region.  This function
 504 * must not fail.
 505 */
 506static void userspace_set_region_sync(struct dm_dirty_log *log,
 507                                      region_t region, int in_sync)
 508{
 509        int r;
 510        struct log_c *lc = log->context;
 511        struct {
 512                region_t r;
 513                int64_t i;
 514        } pkg;
 515
 516        pkg.r = region;
 517        pkg.i = (int64_t)in_sync;
 518
 519        r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
 520                                 (char *)&pkg, sizeof(pkg),
 521                                 NULL, NULL);
 522
 523        /*
 524         * It would be nice to be able to report failures.
 525         * However, it is easy emough to detect and resolve.
 526         */
 527        return;
 528}
 529
 530/*
 531 * userspace_get_sync_count
 532 *
 533 * If there is any sort of failure when consulting the server,
 534 * we assume that the sync count is zero.
 535 *
 536 * Returns: sync count on success, 0 on failure
 537 */
 538static region_t userspace_get_sync_count(struct dm_dirty_log *log)
 539{
 540        int r;
 541        size_t rdata_size;
 542        uint64_t sync_count;
 543        struct log_c *lc = log->context;
 544
 545        rdata_size = sizeof(sync_count);
 546        r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
 547                                 NULL, 0,
 548                                 (char *)&sync_count, &rdata_size);
 549
 550        if (r)
 551                return 0;
 552
 553        if (sync_count >= lc->region_count)
 554                lc->in_sync_hint = lc->region_count;
 555
 556        return (region_t)sync_count;
 557}
 558
 559/*
 560 * userspace_status
 561 *
 562 * Returns: amount of space consumed
 563 */
 564static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
 565                            char *result, unsigned maxlen)
 566{
 567        int r = 0;
 568        char *table_args;
 569        size_t sz = (size_t)maxlen;
 570        struct log_c *lc = log->context;
 571
 572        switch (status_type) {
 573        case STATUSTYPE_INFO:
 574                r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
 575                                         NULL, 0,
 576                                         result, &sz);
 577
 578                if (r) {
 579                        sz = 0;
 580                        DMEMIT("%s 1 COM_FAILURE", log->type->name);
 581                }
 582                break;
 583        case STATUSTYPE_TABLE:
 584                sz = 0;
 585                table_args = strchr(lc->usr_argv_str, ' ');
 586                BUG_ON(!table_args); /* There will always be a ' ' */
 587                table_args++;
 588
 589                DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
 590                       lc->uuid, table_args);
 591                break;
 592        }
 593        return (r) ? 0 : (int)sz;
 594}
 595
 596/*
 597 * userspace_is_remote_recovering
 598 *
 599 * Returns: 1 if region recovering, 0 otherwise
 600 */
 601static int userspace_is_remote_recovering(struct dm_dirty_log *log,
 602                                          region_t region)
 603{
 604        int r;
 605        uint64_t region64 = region;
 606        struct log_c *lc = log->context;
 607        static unsigned long long limit;
 608        struct {
 609                int64_t is_recovering;
 610                uint64_t in_sync_hint;
 611        } pkg;
 612        size_t rdata_size = sizeof(pkg);
 613
 614        /*
 615         * Once the mirror has been reported to be in-sync,
 616         * it will never again ask for recovery work.  So,
 617         * we can safely say there is not a remote machine
 618         * recovering if the device is in-sync.  (in_sync_hint
 619         * must be reset at resume time.)
 620         */
 621        if (region < lc->in_sync_hint)
 622                return 0;
 623        else if (jiffies < limit)
 624                return 1;
 625
 626        limit = jiffies + (HZ / 4);
 627        r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
 628                                 (char *)&region64, sizeof(region64),
 629                                 (char *)&pkg, &rdata_size);
 630        if (r)
 631                return 1;
 632
 633        lc->in_sync_hint = pkg.in_sync_hint;
 634
 635        return (int)pkg.is_recovering;
 636}
 637
 638static struct dm_dirty_log_type _userspace_type = {
 639        .name = "userspace",
 640        .module = THIS_MODULE,
 641        .ctr = userspace_ctr,
 642        .dtr = userspace_dtr,
 643        .presuspend = userspace_presuspend,
 644        .postsuspend = userspace_postsuspend,
 645        .resume = userspace_resume,
 646        .get_region_size = userspace_get_region_size,
 647        .is_clean = userspace_is_clean,
 648        .in_sync = userspace_in_sync,
 649        .flush = userspace_flush,
 650        .mark_region = userspace_mark_region,
 651        .clear_region = userspace_clear_region,
 652        .get_resync_work = userspace_get_resync_work,
 653        .set_region_sync = userspace_set_region_sync,
 654        .get_sync_count = userspace_get_sync_count,
 655        .status = userspace_status,
 656        .is_remote_recovering = userspace_is_remote_recovering,
 657};
 658
 659static int __init userspace_dirty_log_init(void)
 660{
 661        int r = 0;
 662
 663        flush_entry_pool = mempool_create(100, flush_entry_alloc,
 664                                          flush_entry_free, NULL);
 665
 666        if (!flush_entry_pool) {
 667                DMWARN("Unable to create flush_entry_pool:  No memory.");
 668                return -ENOMEM;
 669        }
 670
 671        r = dm_ulog_tfr_init();
 672        if (r) {
 673                DMWARN("Unable to initialize userspace log communications");
 674                mempool_destroy(flush_entry_pool);
 675                return r;
 676        }
 677
 678        r = dm_dirty_log_type_register(&_userspace_type);
 679        if (r) {
 680                DMWARN("Couldn't register userspace dirty log type");
 681                dm_ulog_tfr_exit();
 682                mempool_destroy(flush_entry_pool);
 683                return r;
 684        }
 685
 686        DMINFO("version 1.0.0 loaded");
 687        return 0;
 688}
 689
 690static void __exit userspace_dirty_log_exit(void)
 691{
 692        dm_dirty_log_type_unregister(&_userspace_type);
 693        dm_ulog_tfr_exit();
 694        mempool_destroy(flush_entry_pool);
 695
 696        DMINFO("version 1.0.0 unloaded");
 697        return;
 698}
 699
 700module_init(userspace_dirty_log_init);
 701module_exit(userspace_dirty_log_exit);
 702
 703MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
 704MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
 705MODULE_LICENSE("GPL");
 706