linux/drivers/nvdimm/btt.c
<<
>>
Prefs
   1/*
   2 * Block Translation Table
   3 * Copyright (c) 2014-2015, Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12 * more details.
  13 */
  14#include <linux/highmem.h>
  15#include <linux/debugfs.h>
  16#include <linux/blkdev.h>
  17#include <linux/module.h>
  18#include <linux/device.h>
  19#include <linux/mutex.h>
  20#include <linux/hdreg.h>
  21#include <linux/genhd.h>
  22#include <linux/sizes.h>
  23#include <linux/ndctl.h>
  24#include <linux/fs.h>
  25#include <linux/nd.h>
  26#include "btt.h"
  27#include "nd.h"
  28
  29enum log_ent_request {
  30        LOG_NEW_ENT = 0,
  31        LOG_OLD_ENT
  32};
  33
  34static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
  35                void *buf, size_t n)
  36{
  37        struct nd_btt *nd_btt = arena->nd_btt;
  38        struct nd_namespace_common *ndns = nd_btt->ndns;
  39
  40        /* arena offsets are 4K from the base of the device */
  41        offset += SZ_4K;
  42        return nvdimm_read_bytes(ndns, offset, buf, n);
  43}
  44
  45static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
  46                void *buf, size_t n)
  47{
  48        struct nd_btt *nd_btt = arena->nd_btt;
  49        struct nd_namespace_common *ndns = nd_btt->ndns;
  50
  51        /* arena offsets are 4K from the base of the device */
  52        offset += SZ_4K;
  53        return nvdimm_write_bytes(ndns, offset, buf, n);
  54}
  55
  56static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
  57{
  58        int ret;
  59
  60        ret = arena_write_bytes(arena, arena->info2off, super,
  61                        sizeof(struct btt_sb));
  62        if (ret)
  63                return ret;
  64
  65        return arena_write_bytes(arena, arena->infooff, super,
  66                        sizeof(struct btt_sb));
  67}
  68
  69static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
  70{
  71        WARN_ON(!super);
  72        return arena_read_bytes(arena, arena->infooff, super,
  73                        sizeof(struct btt_sb));
  74}
  75
  76/*
  77 * 'raw' version of btt_map write
  78 * Assumptions:
  79 *   mapping is in little-endian
  80 *   mapping contains 'E' and 'Z' flags as desired
  81 */
  82static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
  83{
  84        u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
  85
  86        WARN_ON(lba >= arena->external_nlba);
  87        return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
  88}
  89
  90static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
  91                        u32 z_flag, u32 e_flag)
  92{
  93        u32 ze;
  94        __le32 mapping_le;
  95
  96        /*
  97         * This 'mapping' is supposed to be just the LBA mapping, without
  98         * any flags set, so strip the flag bits.
  99         */
 100        mapping &= MAP_LBA_MASK;
 101
 102        ze = (z_flag << 1) + e_flag;
 103        switch (ze) {
 104        case 0:
 105                /*
 106                 * We want to set neither of the Z or E flags, and
 107                 * in the actual layout, this means setting the bit
 108                 * positions of both to '1' to indicate a 'normal'
 109                 * map entry
 110                 */
 111                mapping |= MAP_ENT_NORMAL;
 112                break;
 113        case 1:
 114                mapping |= (1 << MAP_ERR_SHIFT);
 115                break;
 116        case 2:
 117                mapping |= (1 << MAP_TRIM_SHIFT);
 118                break;
 119        default:
 120                /*
 121                 * The case where Z and E are both sent in as '1' could be
 122                 * construed as a valid 'normal' case, but we decide not to,
 123                 * to avoid confusion
 124                 */
 125                WARN_ONCE(1, "Invalid use of Z and E flags\n");
 126                return -EIO;
 127        }
 128
 129        mapping_le = cpu_to_le32(mapping);
 130        return __btt_map_write(arena, lba, mapping_le);
 131}
 132
 133static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
 134                        int *trim, int *error)
 135{
 136        int ret;
 137        __le32 in;
 138        u32 raw_mapping, postmap, ze, z_flag, e_flag;
 139        u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
 140
 141        WARN_ON(lba >= arena->external_nlba);
 142
 143        ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
 144        if (ret)
 145                return ret;
 146
 147        raw_mapping = le32_to_cpu(in);
 148
 149        z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
 150        e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
 151        ze = (z_flag << 1) + e_flag;
 152        postmap = raw_mapping & MAP_LBA_MASK;
 153
 154        /* Reuse the {z,e}_flag variables for *trim and *error */
 155        z_flag = 0;
 156        e_flag = 0;
 157
 158        switch (ze) {
 159        case 0:
 160                /* Initial state. Return postmap = premap */
 161                *mapping = lba;
 162                break;
 163        case 1:
 164                *mapping = postmap;
 165                e_flag = 1;
 166                break;
 167        case 2:
 168                *mapping = postmap;
 169                z_flag = 1;
 170                break;
 171        case 3:
 172                *mapping = postmap;
 173                break;
 174        default:
 175                return -EIO;
 176        }
 177
 178        if (trim)
 179                *trim = z_flag;
 180        if (error)
 181                *error = e_flag;
 182
 183        return ret;
 184}
 185
 186static int btt_log_read_pair(struct arena_info *arena, u32 lane,
 187                        struct log_entry *ent)
 188{
 189        WARN_ON(!ent);
 190        return arena_read_bytes(arena,
 191                        arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
 192                        2 * LOG_ENT_SIZE);
 193}
 194
 195static struct dentry *debugfs_root;
 196
 197static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
 198                                int idx)
 199{
 200        char dirname[32];
 201        struct dentry *d;
 202
 203        /* If for some reason, parent bttN was not created, exit */
 204        if (!parent)
 205                return;
 206
 207        snprintf(dirname, 32, "arena%d", idx);
 208        d = debugfs_create_dir(dirname, parent);
 209        if (IS_ERR_OR_NULL(d))
 210                return;
 211        a->debugfs_dir = d;
 212
 213        debugfs_create_x64("size", S_IRUGO, d, &a->size);
 214        debugfs_create_x64("external_lba_start", S_IRUGO, d,
 215                                &a->external_lba_start);
 216        debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba);
 217        debugfs_create_u32("internal_lbasize", S_IRUGO, d,
 218                                &a->internal_lbasize);
 219        debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba);
 220        debugfs_create_u32("external_lbasize", S_IRUGO, d,
 221                                &a->external_lbasize);
 222        debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree);
 223        debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major);
 224        debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor);
 225        debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff);
 226        debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff);
 227        debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff);
 228        debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff);
 229        debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
 230        debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
 231        debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
 232}
 233
 234static void btt_debugfs_init(struct btt *btt)
 235{
 236        int i = 0;
 237        struct arena_info *arena;
 238
 239        btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev),
 240                                                debugfs_root);
 241        if (IS_ERR_OR_NULL(btt->debugfs_dir))
 242                return;
 243
 244        list_for_each_entry(arena, &btt->arena_list, list) {
 245                arena_debugfs_init(arena, btt->debugfs_dir, i);
 246                i++;
 247        }
 248}
 249
 250/*
 251 * This function accepts two log entries, and uses the
 252 * sequence number to find the 'older' entry.
 253 * It also updates the sequence number in this old entry to
 254 * make it the 'new' one if the mark_flag is set.
 255 * Finally, it returns which of the entries was the older one.
 256 *
 257 * TODO The logic feels a bit kludge-y. make it better..
 258 */
 259static int btt_log_get_old(struct log_entry *ent)
 260{
 261        int old;
 262
 263        /*
 264         * the first ever time this is seen, the entry goes into [0]
 265         * the next time, the following logic works out to put this
 266         * (next) entry into [1]
 267         */
 268        if (ent[0].seq == 0) {
 269                ent[0].seq = cpu_to_le32(1);
 270                return 0;
 271        }
 272
 273        if (ent[0].seq == ent[1].seq)
 274                return -EINVAL;
 275        if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
 276                return -EINVAL;
 277
 278        if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
 279                if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
 280                        old = 0;
 281                else
 282                        old = 1;
 283        } else {
 284                if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
 285                        old = 1;
 286                else
 287                        old = 0;
 288        }
 289
 290        return old;
 291}
 292
 293static struct device *to_dev(struct arena_info *arena)
 294{
 295        return &arena->nd_btt->dev;
 296}
 297
 298/*
 299 * This function copies the desired (old/new) log entry into ent if
 300 * it is not NULL. It returns the sub-slot number (0 or 1)
 301 * where the desired log entry was found. Negative return values
 302 * indicate errors.
 303 */
 304static int btt_log_read(struct arena_info *arena, u32 lane,
 305                        struct log_entry *ent, int old_flag)
 306{
 307        int ret;
 308        int old_ent, ret_ent;
 309        struct log_entry log[2];
 310
 311        ret = btt_log_read_pair(arena, lane, log);
 312        if (ret)
 313                return -EIO;
 314
 315        old_ent = btt_log_get_old(log);
 316        if (old_ent < 0 || old_ent > 1) {
 317                dev_info(to_dev(arena),
 318                                "log corruption (%d): lane %d seq [%d, %d]\n",
 319                        old_ent, lane, log[0].seq, log[1].seq);
 320                /* TODO set error state? */
 321                return -EIO;
 322        }
 323
 324        ret_ent = (old_flag ? old_ent : (1 - old_ent));
 325
 326        if (ent != NULL)
 327                memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
 328
 329        return ret_ent;
 330}
 331
 332/*
 333 * This function commits a log entry to media
 334 * It does _not_ prepare the freelist entry for the next write
 335 * btt_flog_write is the wrapper for updating the freelist elements
 336 */
 337static int __btt_log_write(struct arena_info *arena, u32 lane,
 338                        u32 sub, struct log_entry *ent)
 339{
 340        int ret;
 341        /*
 342         * Ignore the padding in log_entry for calculating log_half.
 343         * The entry is 'committed' when we write the sequence number,
 344         * and we want to ensure that that is the last thing written.
 345         * We don't bother writing the padding as that would be extra
 346         * media wear and write amplification
 347         */
 348        unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
 349        u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
 350        void *src = ent;
 351
 352        /* split the 16B write into atomic, durable halves */
 353        ret = arena_write_bytes(arena, ns_off, src, log_half);
 354        if (ret)
 355                return ret;
 356
 357        ns_off += log_half;
 358        src += log_half;
 359        return arena_write_bytes(arena, ns_off, src, log_half);
 360}
 361
 362static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
 363                        struct log_entry *ent)
 364{
 365        int ret;
 366
 367        ret = __btt_log_write(arena, lane, sub, ent);
 368        if (ret)
 369                return ret;
 370
 371        /* prepare the next free entry */
 372        arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
 373        if (++(arena->freelist[lane].seq) == 4)
 374                arena->freelist[lane].seq = 1;
 375        arena->freelist[lane].block = le32_to_cpu(ent->old_map);
 376
 377        return ret;
 378}
 379
 380/*
 381 * This function initializes the BTT map to the initial state, which is
 382 * all-zeroes, and indicates an identity mapping
 383 */
 384static int btt_map_init(struct arena_info *arena)
 385{
 386        int ret = -EINVAL;
 387        void *zerobuf;
 388        size_t offset = 0;
 389        size_t chunk_size = SZ_2M;
 390        size_t mapsize = arena->logoff - arena->mapoff;
 391
 392        zerobuf = kzalloc(chunk_size, GFP_KERNEL);
 393        if (!zerobuf)
 394                return -ENOMEM;
 395
 396        while (mapsize) {
 397                size_t size = min(mapsize, chunk_size);
 398
 399                ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
 400                                size);
 401                if (ret)
 402                        goto free;
 403
 404                offset += size;
 405                mapsize -= size;
 406                cond_resched();
 407        }
 408
 409 free:
 410        kfree(zerobuf);
 411        return ret;
 412}
 413
 414/*
 415 * This function initializes the BTT log with 'fake' entries pointing
 416 * to the initial reserved set of blocks as being free
 417 */
 418static int btt_log_init(struct arena_info *arena)
 419{
 420        int ret;
 421        u32 i;
 422        struct log_entry log, zerolog;
 423
 424        memset(&zerolog, 0, sizeof(zerolog));
 425
 426        for (i = 0; i < arena->nfree; i++) {
 427                log.lba = cpu_to_le32(i);
 428                log.old_map = cpu_to_le32(arena->external_nlba + i);
 429                log.new_map = cpu_to_le32(arena->external_nlba + i);
 430                log.seq = cpu_to_le32(LOG_SEQ_INIT);
 431                ret = __btt_log_write(arena, i, 0, &log);
 432                if (ret)
 433                        return ret;
 434                ret = __btt_log_write(arena, i, 1, &zerolog);
 435                if (ret)
 436                        return ret;
 437        }
 438
 439        return 0;
 440}
 441
 442static int btt_freelist_init(struct arena_info *arena)
 443{
 444        int old, new, ret;
 445        u32 i, map_entry;
 446        struct log_entry log_new, log_old;
 447
 448        arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
 449                                        GFP_KERNEL);
 450        if (!arena->freelist)
 451                return -ENOMEM;
 452
 453        for (i = 0; i < arena->nfree; i++) {
 454                old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
 455                if (old < 0)
 456                        return old;
 457
 458                new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
 459                if (new < 0)
 460                        return new;
 461
 462                /* sub points to the next one to be overwritten */
 463                arena->freelist[i].sub = 1 - new;
 464                arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
 465                arena->freelist[i].block = le32_to_cpu(log_new.old_map);
 466
 467                /* This implies a newly created or untouched flog entry */
 468                if (log_new.old_map == log_new.new_map)
 469                        continue;
 470
 471                /* Check if map recovery is needed */
 472                ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
 473                                NULL, NULL);
 474                if (ret)
 475                        return ret;
 476                if ((le32_to_cpu(log_new.new_map) != map_entry) &&
 477                                (le32_to_cpu(log_new.old_map) == map_entry)) {
 478                        /*
 479                         * Last transaction wrote the flog, but wasn't able
 480                         * to complete the map write. So fix up the map.
 481                         */
 482                        ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
 483                                        le32_to_cpu(log_new.new_map), 0, 0);
 484                        if (ret)
 485                                return ret;
 486                }
 487
 488        }
 489
 490        return 0;
 491}
 492
 493static int btt_rtt_init(struct arena_info *arena)
 494{
 495        arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
 496        if (arena->rtt == NULL)
 497                return -ENOMEM;
 498
 499        return 0;
 500}
 501
 502static int btt_maplocks_init(struct arena_info *arena)
 503{
 504        u32 i;
 505
 506        arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock),
 507                                GFP_KERNEL);
 508        if (!arena->map_locks)
 509                return -ENOMEM;
 510
 511        for (i = 0; i < arena->nfree; i++)
 512                spin_lock_init(&arena->map_locks[i].lock);
 513
 514        return 0;
 515}
 516
 517static struct arena_info *alloc_arena(struct btt *btt, size_t size,
 518                                size_t start, size_t arena_off)
 519{
 520        struct arena_info *arena;
 521        u64 logsize, mapsize, datasize;
 522        u64 available = size;
 523
 524        arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL);
 525        if (!arena)
 526                return NULL;
 527        arena->nd_btt = btt->nd_btt;
 528
 529        if (!size)
 530                return arena;
 531
 532        arena->size = size;
 533        arena->external_lba_start = start;
 534        arena->external_lbasize = btt->lbasize;
 535        arena->internal_lbasize = roundup(arena->external_lbasize,
 536                                        INT_LBASIZE_ALIGNMENT);
 537        arena->nfree = BTT_DEFAULT_NFREE;
 538        arena->version_major = 1;
 539        arena->version_minor = 1;
 540
 541        if (available % BTT_PG_SIZE)
 542                available -= (available % BTT_PG_SIZE);
 543
 544        /* Two pages are reserved for the super block and its copy */
 545        available -= 2 * BTT_PG_SIZE;
 546
 547        /* The log takes a fixed amount of space based on nfree */
 548        logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
 549                                BTT_PG_SIZE);
 550        available -= logsize;
 551
 552        /* Calculate optimal split between map and data area */
 553        arena->internal_nlba = div_u64(available - BTT_PG_SIZE,
 554                        arena->internal_lbasize + MAP_ENT_SIZE);
 555        arena->external_nlba = arena->internal_nlba - arena->nfree;
 556
 557        mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE);
 558        datasize = available - mapsize;
 559
 560        /* 'Absolute' values, relative to start of storage space */
 561        arena->infooff = arena_off;
 562        arena->dataoff = arena->infooff + BTT_PG_SIZE;
 563        arena->mapoff = arena->dataoff + datasize;
 564        arena->logoff = arena->mapoff + mapsize;
 565        arena->info2off = arena->logoff + logsize;
 566        return arena;
 567}
 568
 569static void free_arenas(struct btt *btt)
 570{
 571        struct arena_info *arena, *next;
 572
 573        list_for_each_entry_safe(arena, next, &btt->arena_list, list) {
 574                list_del(&arena->list);
 575                kfree(arena->rtt);
 576                kfree(arena->map_locks);
 577                kfree(arena->freelist);
 578                debugfs_remove_recursive(arena->debugfs_dir);
 579                kfree(arena);
 580        }
 581}
 582
 583/*
 584 * This function reads an existing valid btt superblock and
 585 * populates the corresponding arena_info struct
 586 */
 587static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
 588                                u64 arena_off)
 589{
 590        arena->internal_nlba = le32_to_cpu(super->internal_nlba);
 591        arena->internal_lbasize = le32_to_cpu(super->internal_lbasize);
 592        arena->external_nlba = le32_to_cpu(super->external_nlba);
 593        arena->external_lbasize = le32_to_cpu(super->external_lbasize);
 594        arena->nfree = le32_to_cpu(super->nfree);
 595        arena->version_major = le16_to_cpu(super->version_major);
 596        arena->version_minor = le16_to_cpu(super->version_minor);
 597
 598        arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off +
 599                        le64_to_cpu(super->nextoff));
 600        arena->infooff = arena_off;
 601        arena->dataoff = arena_off + le64_to_cpu(super->dataoff);
 602        arena->mapoff = arena_off + le64_to_cpu(super->mapoff);
 603        arena->logoff = arena_off + le64_to_cpu(super->logoff);
 604        arena->info2off = arena_off + le64_to_cpu(super->info2off);
 605
 606        arena->size = (le64_to_cpu(super->nextoff) > 0)
 607                ? (le64_to_cpu(super->nextoff))
 608                : (arena->info2off - arena->infooff + BTT_PG_SIZE);
 609
 610        arena->flags = le32_to_cpu(super->flags);
 611}
 612
 613static int discover_arenas(struct btt *btt)
 614{
 615        int ret = 0;
 616        struct arena_info *arena;
 617        struct btt_sb *super;
 618        size_t remaining = btt->rawsize;
 619        u64 cur_nlba = 0;
 620        size_t cur_off = 0;
 621        int num_arenas = 0;
 622
 623        super = kzalloc(sizeof(*super), GFP_KERNEL);
 624        if (!super)
 625                return -ENOMEM;
 626
 627        while (remaining) {
 628                /* Alloc memory for arena */
 629                arena = alloc_arena(btt, 0, 0, 0);
 630                if (!arena) {
 631                        ret = -ENOMEM;
 632                        goto out_super;
 633                }
 634
 635                arena->infooff = cur_off;
 636                ret = btt_info_read(arena, super);
 637                if (ret)
 638                        goto out;
 639
 640                if (!nd_btt_arena_is_valid(btt->nd_btt, super)) {
 641                        if (remaining == btt->rawsize) {
 642                                btt->init_state = INIT_NOTFOUND;
 643                                dev_info(to_dev(arena), "No existing arenas\n");
 644                                goto out;
 645                        } else {
 646                                dev_info(to_dev(arena),
 647                                                "Found corrupted metadata!\n");
 648                                ret = -ENODEV;
 649                                goto out;
 650                        }
 651                }
 652
 653                arena->external_lba_start = cur_nlba;
 654                parse_arena_meta(arena, super, cur_off);
 655
 656                ret = btt_freelist_init(arena);
 657                if (ret)
 658                        goto out;
 659
 660                ret = btt_rtt_init(arena);
 661                if (ret)
 662                        goto out;
 663
 664                ret = btt_maplocks_init(arena);
 665                if (ret)
 666                        goto out;
 667
 668                list_add_tail(&arena->list, &btt->arena_list);
 669
 670                remaining -= arena->size;
 671                cur_off += arena->size;
 672                cur_nlba += arena->external_nlba;
 673                num_arenas++;
 674
 675                if (arena->nextoff == 0)
 676                        break;
 677        }
 678        btt->num_arenas = num_arenas;
 679        btt->nlba = cur_nlba;
 680        btt->init_state = INIT_READY;
 681
 682        kfree(super);
 683        return ret;
 684
 685 out:
 686        kfree(arena);
 687        free_arenas(btt);
 688 out_super:
 689        kfree(super);
 690        return ret;
 691}
 692
 693static int create_arenas(struct btt *btt)
 694{
 695        size_t remaining = btt->rawsize;
 696        size_t cur_off = 0;
 697
 698        while (remaining) {
 699                struct arena_info *arena;
 700                size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining);
 701
 702                remaining -= arena_size;
 703                if (arena_size < ARENA_MIN_SIZE)
 704                        break;
 705
 706                arena = alloc_arena(btt, arena_size, btt->nlba, cur_off);
 707                if (!arena) {
 708                        free_arenas(btt);
 709                        return -ENOMEM;
 710                }
 711                btt->nlba += arena->external_nlba;
 712                if (remaining >= ARENA_MIN_SIZE)
 713                        arena->nextoff = arena->size;
 714                else
 715                        arena->nextoff = 0;
 716                cur_off += arena_size;
 717                list_add_tail(&arena->list, &btt->arena_list);
 718        }
 719
 720        return 0;
 721}
 722
 723/*
 724 * This function completes arena initialization by writing
 725 * all the metadata.
 726 * It is only called for an uninitialized arena when a write
 727 * to that arena occurs for the first time.
 728 */
 729static int btt_arena_write_layout(struct arena_info *arena)
 730{
 731        int ret;
 732        u64 sum;
 733        struct btt_sb *super;
 734        struct nd_btt *nd_btt = arena->nd_btt;
 735        const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
 736
 737        ret = btt_map_init(arena);
 738        if (ret)
 739                return ret;
 740
 741        ret = btt_log_init(arena);
 742        if (ret)
 743                return ret;
 744
 745        super = kzalloc(sizeof(struct btt_sb), GFP_NOIO);
 746        if (!super)
 747                return -ENOMEM;
 748
 749        strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
 750        memcpy(super->uuid, nd_btt->uuid, 16);
 751        memcpy(super->parent_uuid, parent_uuid, 16);
 752        super->flags = cpu_to_le32(arena->flags);
 753        super->version_major = cpu_to_le16(arena->version_major);
 754        super->version_minor = cpu_to_le16(arena->version_minor);
 755        super->external_lbasize = cpu_to_le32(arena->external_lbasize);
 756        super->external_nlba = cpu_to_le32(arena->external_nlba);
 757        super->internal_lbasize = cpu_to_le32(arena->internal_lbasize);
 758        super->internal_nlba = cpu_to_le32(arena->internal_nlba);
 759        super->nfree = cpu_to_le32(arena->nfree);
 760        super->infosize = cpu_to_le32(sizeof(struct btt_sb));
 761        super->nextoff = cpu_to_le64(arena->nextoff);
 762        /*
 763         * Subtract arena->infooff (arena start) so numbers are relative
 764         * to 'this' arena
 765         */
 766        super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff);
 767        super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff);
 768        super->logoff = cpu_to_le64(arena->logoff - arena->infooff);
 769        super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
 770
 771        super->flags = 0;
 772        sum = nd_sb_checksum((struct nd_gen_sb *) super);
 773        super->checksum = cpu_to_le64(sum);
 774
 775        ret = btt_info_write(arena, super);
 776
 777        kfree(super);
 778        return ret;
 779}
 780
 781/*
 782 * This function completes the initialization for the BTT namespace
 783 * such that it is ready to accept IOs
 784 */
 785static int btt_meta_init(struct btt *btt)
 786{
 787        int ret = 0;
 788        struct arena_info *arena;
 789
 790        mutex_lock(&btt->init_lock);
 791        list_for_each_entry(arena, &btt->arena_list, list) {
 792                ret = btt_arena_write_layout(arena);
 793                if (ret)
 794                        goto unlock;
 795
 796                ret = btt_freelist_init(arena);
 797                if (ret)
 798                        goto unlock;
 799
 800                ret = btt_rtt_init(arena);
 801                if (ret)
 802                        goto unlock;
 803
 804                ret = btt_maplocks_init(arena);
 805                if (ret)
 806                        goto unlock;
 807        }
 808
 809        btt->init_state = INIT_READY;
 810
 811 unlock:
 812        mutex_unlock(&btt->init_lock);
 813        return ret;
 814}
 815
 816static u32 btt_meta_size(struct btt *btt)
 817{
 818        return btt->lbasize - btt->sector_size;
 819}
 820
 821/*
 822 * This function calculates the arena in which the given LBA lies
 823 * by doing a linear walk. This is acceptable since we expect only
 824 * a few arenas. If we have backing devices that get much larger,
 825 * we can construct a balanced binary tree of arenas at init time
 826 * so that this range search becomes faster.
 827 */
 828static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap,
 829                                struct arena_info **arena)
 830{
 831        struct arena_info *arena_list;
 832        __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size);
 833
 834        list_for_each_entry(arena_list, &btt->arena_list, list) {
 835                if (lba < arena_list->external_nlba) {
 836                        *arena = arena_list;
 837                        *premap = lba;
 838                        return 0;
 839                }
 840                lba -= arena_list->external_nlba;
 841        }
 842
 843        return -EIO;
 844}
 845
 846/*
 847 * The following (lock_map, unlock_map) are mostly just to improve
 848 * readability, since they index into an array of locks
 849 */
 850static void lock_map(struct arena_info *arena, u32 premap)
 851                __acquires(&arena->map_locks[idx].lock)
 852{
 853        u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
 854
 855        spin_lock(&arena->map_locks[idx].lock);
 856}
 857
 858static void unlock_map(struct arena_info *arena, u32 premap)
 859                __releases(&arena->map_locks[idx].lock)
 860{
 861        u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
 862
 863        spin_unlock(&arena->map_locks[idx].lock);
 864}
 865
 866static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
 867{
 868        return arena->dataoff + ((u64)lba * arena->internal_lbasize);
 869}
 870
 871static int btt_data_read(struct arena_info *arena, struct page *page,
 872                        unsigned int off, u32 lba, u32 len)
 873{
 874        int ret;
 875        u64 nsoff = to_namespace_offset(arena, lba);
 876        void *mem = kmap_atomic(page);
 877
 878        ret = arena_read_bytes(arena, nsoff, mem + off, len);
 879        kunmap_atomic(mem);
 880
 881        return ret;
 882}
 883
 884static int btt_data_write(struct arena_info *arena, u32 lba,
 885                        struct page *page, unsigned int off, u32 len)
 886{
 887        int ret;
 888        u64 nsoff = to_namespace_offset(arena, lba);
 889        void *mem = kmap_atomic(page);
 890
 891        ret = arena_write_bytes(arena, nsoff, mem + off, len);
 892        kunmap_atomic(mem);
 893
 894        return ret;
 895}
 896
 897static void zero_fill_data(struct page *page, unsigned int off, u32 len)
 898{
 899        void *mem = kmap_atomic(page);
 900
 901        memset(mem + off, 0, len);
 902        kunmap_atomic(mem);
 903}
 904
 905#ifdef CONFIG_BLK_DEV_INTEGRITY
 906static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 907                        struct arena_info *arena, u32 postmap, int rw)
 908{
 909        unsigned int len = btt_meta_size(btt);
 910        u64 meta_nsoff;
 911        int ret = 0;
 912
 913        if (bip == NULL)
 914                return 0;
 915
 916        meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size;
 917
 918        while (len) {
 919                unsigned int cur_len;
 920                struct bio_vec bv;
 921                void *mem;
 922
 923                bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
 924                /*
 925                 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
 926                 * .bv_offset already adjusted for iter->bi_bvec_done, and we
 927                 * can use those directly
 928                 */
 929
 930                cur_len = min(len, bv.bv_len);
 931                mem = kmap_atomic(bv.bv_page);
 932                if (rw)
 933                        ret = arena_write_bytes(arena, meta_nsoff,
 934                                        mem + bv.bv_offset, cur_len);
 935                else
 936                        ret = arena_read_bytes(arena, meta_nsoff,
 937                                        mem + bv.bv_offset, cur_len);
 938
 939                kunmap_atomic(mem);
 940                if (ret)
 941                        return ret;
 942
 943                len -= cur_len;
 944                meta_nsoff += cur_len;
 945                bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
 946        }
 947
 948        return ret;
 949}
 950
 951#else /* CONFIG_BLK_DEV_INTEGRITY */
 952static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 953                        struct arena_info *arena, u32 postmap, int rw)
 954{
 955        return 0;
 956}
 957#endif
 958
 959static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
 960                        struct page *page, unsigned int off, sector_t sector,
 961                        unsigned int len)
 962{
 963        int ret = 0;
 964        int t_flag, e_flag;
 965        struct arena_info *arena = NULL;
 966        u32 lane = 0, premap, postmap;
 967
 968        while (len) {
 969                u32 cur_len;
 970
 971                lane = nd_region_acquire_lane(btt->nd_region);
 972
 973                ret = lba_to_arena(btt, sector, &premap, &arena);
 974                if (ret)
 975                        goto out_lane;
 976
 977                cur_len = min(btt->sector_size, len);
 978
 979                ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
 980                if (ret)
 981                        goto out_lane;
 982
 983                /*
 984                 * We loop to make sure that the post map LBA didn't change
 985                 * from under us between writing the RTT and doing the actual
 986                 * read.
 987                 */
 988                while (1) {
 989                        u32 new_map;
 990
 991                        if (t_flag) {
 992                                zero_fill_data(page, off, cur_len);
 993                                goto out_lane;
 994                        }
 995
 996                        if (e_flag) {
 997                                ret = -EIO;
 998                                goto out_lane;
 999                        }
1000
1001                        arena->rtt[lane] = RTT_VALID | postmap;
1002                        /*
1003                         * Barrier to make sure this write is not reordered
1004                         * to do the verification map_read before the RTT store
1005                         */
1006                        barrier();
1007
1008                        ret = btt_map_read(arena, premap, &new_map, &t_flag,
1009                                                &e_flag);
1010                        if (ret)
1011                                goto out_rtt;
1012
1013                        if (postmap == new_map)
1014                                break;
1015
1016                        postmap = new_map;
1017                }
1018
1019                ret = btt_data_read(arena, page, off, postmap, cur_len);
1020                if (ret)
1021                        goto out_rtt;
1022
1023                if (bip) {
1024                        ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
1025                        if (ret)
1026                                goto out_rtt;
1027                }
1028
1029                arena->rtt[lane] = RTT_INVALID;
1030                nd_region_release_lane(btt->nd_region, lane);
1031
1032                len -= cur_len;
1033                off += cur_len;
1034                sector += btt->sector_size >> SECTOR_SHIFT;
1035        }
1036
1037        return 0;
1038
1039 out_rtt:
1040        arena->rtt[lane] = RTT_INVALID;
1041 out_lane:
1042        nd_region_release_lane(btt->nd_region, lane);
1043        return ret;
1044}
1045
1046static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1047                        sector_t sector, struct page *page, unsigned int off,
1048                        unsigned int len)
1049{
1050        int ret = 0;
1051        struct arena_info *arena = NULL;
1052        u32 premap = 0, old_postmap, new_postmap, lane = 0, i;
1053        struct log_entry log;
1054        int sub;
1055
1056        while (len) {
1057                u32 cur_len;
1058
1059                lane = nd_region_acquire_lane(btt->nd_region);
1060
1061                ret = lba_to_arena(btt, sector, &premap, &arena);
1062                if (ret)
1063                        goto out_lane;
1064                cur_len = min(btt->sector_size, len);
1065
1066                if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) {
1067                        ret = -EIO;
1068                        goto out_lane;
1069                }
1070
1071                new_postmap = arena->freelist[lane].block;
1072
1073                /* Wait if the new block is being read from */
1074                for (i = 0; i < arena->nfree; i++)
1075                        while (arena->rtt[i] == (RTT_VALID | new_postmap))
1076                                cpu_relax();
1077
1078
1079                if (new_postmap >= arena->internal_nlba) {
1080                        ret = -EIO;
1081                        goto out_lane;
1082                }
1083
1084                ret = btt_data_write(arena, new_postmap, page, off, cur_len);
1085                if (ret)
1086                        goto out_lane;
1087
1088                if (bip) {
1089                        ret = btt_rw_integrity(btt, bip, arena, new_postmap,
1090                                                WRITE);
1091                        if (ret)
1092                                goto out_lane;
1093                }
1094
1095                lock_map(arena, premap);
1096                ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
1097                if (ret)
1098                        goto out_map;
1099                if (old_postmap >= arena->internal_nlba) {
1100                        ret = -EIO;
1101                        goto out_map;
1102                }
1103
1104                log.lba = cpu_to_le32(premap);
1105                log.old_map = cpu_to_le32(old_postmap);
1106                log.new_map = cpu_to_le32(new_postmap);
1107                log.seq = cpu_to_le32(arena->freelist[lane].seq);
1108                sub = arena->freelist[lane].sub;
1109                ret = btt_flog_write(arena, lane, sub, &log);
1110                if (ret)
1111                        goto out_map;
1112
1113                ret = btt_map_write(arena, premap, new_postmap, 0, 0);
1114                if (ret)
1115                        goto out_map;
1116
1117                unlock_map(arena, premap);
1118                nd_region_release_lane(btt->nd_region, lane);
1119
1120                len -= cur_len;
1121                off += cur_len;
1122                sector += btt->sector_size >> SECTOR_SHIFT;
1123        }
1124
1125        return 0;
1126
1127 out_map:
1128        unlock_map(arena, premap);
1129 out_lane:
1130        nd_region_release_lane(btt->nd_region, lane);
1131        return ret;
1132}
1133
1134static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
1135                        struct page *page, unsigned int len, unsigned int off,
1136                        int rw, sector_t sector)
1137{
1138        int ret;
1139
1140        if (rw == READ) {
1141                ret = btt_read_pg(btt, bip, page, off, sector, len);
1142                flush_dcache_page(page);
1143        } else {
1144                flush_dcache_page(page);
1145                ret = btt_write_pg(btt, bip, sector, page, off, len);
1146        }
1147
1148        return ret;
1149}
1150
1151static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
1152{
1153        struct bio_integrity_payload *bip = bio_integrity(bio);
1154        struct btt *btt = q->queuedata;
1155        struct bvec_iter iter;
1156        unsigned long start;
1157        struct bio_vec bvec;
1158        int err = 0, rw;
1159        bool do_acct;
1160
1161        /*
1162         * bio_integrity_enabled also checks if the bio already has an
1163         * integrity payload attached. If it does, we *don't* do a
1164         * bio_integrity_prep here - the payload has been generated by
1165         * another kernel subsystem, and we just pass it through.
1166         */
1167        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1168                bio->bi_error = -EIO;
1169                goto out;
1170        }
1171
1172        do_acct = nd_iostat_start(bio, &start);
1173        rw = bio_data_dir(bio);
1174        bio_for_each_segment(bvec, bio, iter) {
1175                unsigned int len = bvec.bv_len;
1176
1177                BUG_ON(len > PAGE_SIZE);
1178                /* Make sure len is in multiples of sector size. */
1179                /* XXX is this right? */
1180                BUG_ON(len < btt->sector_size);
1181                BUG_ON(len % btt->sector_size);
1182
1183                err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
1184                                rw, iter.bi_sector);
1185                if (err) {
1186                        dev_info(&btt->nd_btt->dev,
1187                                        "io error in %s sector %lld, len %d,\n",
1188                                        (rw == READ) ? "READ" : "WRITE",
1189                                        (unsigned long long) iter.bi_sector, len);
1190                        bio->bi_error = err;
1191                        break;
1192                }
1193        }
1194        if (do_acct)
1195                nd_iostat_end(bio, start);
1196
1197out:
1198        bio_endio(bio);
1199        return BLK_QC_T_NONE;
1200}
1201
1202static int btt_rw_page(struct block_device *bdev, sector_t sector,
1203                struct page *page, int rw)
1204{
1205        struct btt *btt = bdev->bd_disk->private_data;
1206
1207        btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, rw, sector);
1208        page_endio(page, rw & WRITE, 0);
1209        return 0;
1210}
1211
1212
1213static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
1214{
1215        /* some standard values */
1216        geo->heads = 1 << 6;
1217        geo->sectors = 1 << 5;
1218        geo->cylinders = get_capacity(bd->bd_disk) >> 11;
1219        return 0;
1220}
1221
1222static const struct block_device_operations btt_fops = {
1223        .owner =                THIS_MODULE,
1224        .rw_page =              btt_rw_page,
1225        .getgeo =               btt_getgeo,
1226        .revalidate_disk =      nvdimm_revalidate_disk,
1227};
1228
1229static int btt_blk_init(struct btt *btt)
1230{
1231        struct nd_btt *nd_btt = btt->nd_btt;
1232        struct nd_namespace_common *ndns = nd_btt->ndns;
1233
1234        /* create a new disk and request queue for btt */
1235        btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
1236        if (!btt->btt_queue)
1237                return -ENOMEM;
1238
1239        btt->btt_disk = alloc_disk(0);
1240        if (!btt->btt_disk) {
1241                blk_cleanup_queue(btt->btt_queue);
1242                return -ENOMEM;
1243        }
1244
1245        nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
1246        btt->btt_disk->driverfs_dev = &btt->nd_btt->dev;
1247        btt->btt_disk->first_minor = 0;
1248        btt->btt_disk->fops = &btt_fops;
1249        btt->btt_disk->private_data = btt;
1250        btt->btt_disk->queue = btt->btt_queue;
1251        btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
1252
1253        blk_queue_make_request(btt->btt_queue, btt_make_request);
1254        blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
1255        blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
1256        blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
1257        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
1258        btt->btt_queue->queuedata = btt;
1259
1260        set_capacity(btt->btt_disk, 0);
1261        add_disk(btt->btt_disk);
1262        if (btt_meta_size(btt)) {
1263                int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
1264
1265                if (rc) {
1266                        del_gendisk(btt->btt_disk);
1267                        put_disk(btt->btt_disk);
1268                        blk_cleanup_queue(btt->btt_queue);
1269                        return rc;
1270                }
1271        }
1272        set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
1273        revalidate_disk(btt->btt_disk);
1274
1275        return 0;
1276}
1277
1278static void btt_blk_cleanup(struct btt *btt)
1279{
1280        del_gendisk(btt->btt_disk);
1281        put_disk(btt->btt_disk);
1282        blk_cleanup_queue(btt->btt_queue);
1283}
1284
1285/**
1286 * btt_init - initialize a block translation table for the given device
1287 * @nd_btt:     device with BTT geometry and backing device info
1288 * @rawsize:    raw size in bytes of the backing device
1289 * @lbasize:    lba size of the backing device
1290 * @uuid:       A uuid for the backing device - this is stored on media
1291 * @maxlane:    maximum number of parallel requests the device can handle
1292 *
1293 * Initialize a Block Translation Table on a backing device to provide
1294 * single sector power fail atomicity.
1295 *
1296 * Context:
1297 * Might sleep.
1298 *
1299 * Returns:
1300 * Pointer to a new struct btt on success, NULL on failure.
1301 */
1302static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1303                u32 lbasize, u8 *uuid, struct nd_region *nd_region)
1304{
1305        int ret;
1306        struct btt *btt;
1307        struct device *dev = &nd_btt->dev;
1308
1309        btt = kzalloc(sizeof(struct btt), GFP_KERNEL);
1310        if (!btt)
1311                return NULL;
1312
1313        btt->nd_btt = nd_btt;
1314        btt->rawsize = rawsize;
1315        btt->lbasize = lbasize;
1316        btt->sector_size = ((lbasize >= 4096) ? 4096 : 512);
1317        INIT_LIST_HEAD(&btt->arena_list);
1318        mutex_init(&btt->init_lock);
1319        btt->nd_region = nd_region;
1320
1321        ret = discover_arenas(btt);
1322        if (ret) {
1323                dev_err(dev, "init: error in arena_discover: %d\n", ret);
1324                goto out_free;
1325        }
1326
1327        if (btt->init_state != INIT_READY && nd_region->ro) {
1328                dev_info(dev, "%s is read-only, unable to init btt metadata\n",
1329                                dev_name(&nd_region->dev));
1330                goto out_free;
1331        } else if (btt->init_state != INIT_READY) {
1332                btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
1333                        ((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
1334                dev_dbg(dev, "init: %d arenas for %llu rawsize\n",
1335                                btt->num_arenas, rawsize);
1336
1337                ret = create_arenas(btt);
1338                if (ret) {
1339                        dev_info(dev, "init: create_arenas: %d\n", ret);
1340                        goto out_free;
1341                }
1342
1343                ret = btt_meta_init(btt);
1344                if (ret) {
1345                        dev_err(dev, "init: error in meta_init: %d\n", ret);
1346                        goto out_free;
1347                }
1348        }
1349
1350        ret = btt_blk_init(btt);
1351        if (ret) {
1352                dev_err(dev, "init: error in blk_init: %d\n", ret);
1353                goto out_free;
1354        }
1355
1356        btt_debugfs_init(btt);
1357
1358        return btt;
1359
1360 out_free:
1361        kfree(btt);
1362        return NULL;
1363}
1364
1365/**
1366 * btt_fini - de-initialize a BTT
1367 * @btt:        the BTT handle that was generated by btt_init
1368 *
1369 * De-initialize a Block Translation Table on device removal
1370 *
1371 * Context:
1372 * Might sleep.
1373 */
1374static void btt_fini(struct btt *btt)
1375{
1376        if (btt) {
1377                btt_blk_cleanup(btt);
1378                free_arenas(btt);
1379                debugfs_remove_recursive(btt->debugfs_dir);
1380                kfree(btt);
1381        }
1382}
1383
1384int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
1385{
1386        struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
1387        struct nd_region *nd_region;
1388        struct btt *btt;
1389        size_t rawsize;
1390
1391        if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize)
1392                return -ENODEV;
1393
1394        rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
1395        if (rawsize < ARENA_MIN_SIZE) {
1396                return -ENXIO;
1397        }
1398        nd_region = to_nd_region(nd_btt->dev.parent);
1399        btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
1400                        nd_region);
1401        if (!btt)
1402                return -ENOMEM;
1403        nd_btt->btt = btt;
1404
1405        return 0;
1406}
1407EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
1408
1409int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns)
1410{
1411        struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
1412        struct btt *btt = nd_btt->btt;
1413
1414        btt_fini(btt);
1415        nd_btt->btt = NULL;
1416
1417        return 0;
1418}
1419EXPORT_SYMBOL(nvdimm_namespace_detach_btt);
1420
1421static int __init nd_btt_init(void)
1422{
1423        int rc = 0;
1424
1425        debugfs_root = debugfs_create_dir("btt", NULL);
1426        if (IS_ERR_OR_NULL(debugfs_root))
1427                rc = -ENXIO;
1428
1429        return rc;
1430}
1431
1432static void __exit nd_btt_exit(void)
1433{
1434        debugfs_remove_recursive(debugfs_root);
1435}
1436
1437MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);
1438MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>");
1439MODULE_LICENSE("GPL v2");
1440module_init(nd_btt_init);
1441module_exit(nd_btt_exit);
1442