linux/lib/radix-tree.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2001 Momchil Velikov
   3 * Portions Copyright (C) 2001 Christoph Hellwig
   4 * Copyright (C) 2005 SGI, Christoph Lameter
   5 * Copyright (C) 2006 Nick Piggin
   6 * Copyright (C) 2012 Konstantin Khlebnikov
   7 *
   8 * This program is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU General Public License as
  10 * published by the Free Software Foundation; either version 2, or (at
  11 * your option) any later version.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, write to the Free Software
  20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 */
  22
  23#include <linux/errno.h>
  24#include <linux/init.h>
  25#include <linux/kernel.h>
  26#include <linux/export.h>
  27#include <linux/radix-tree.h>
  28#include <linux/percpu.h>
  29#include <linux/slab.h>
  30#include <linux/notifier.h>
  31#include <linux/cpu.h>
  32#include <linux/string.h>
  33#include <linux/bitops.h>
  34#include <linux/rcupdate.h>
  35
  36
  37#ifdef __KERNEL__
  38#define RADIX_TREE_MAP_SHIFT    (CONFIG_BASE_SMALL ? 4 : 6)
  39#else
  40#define RADIX_TREE_MAP_SHIFT    3       /* For more stressful testing */
  41#endif
  42
  43#define RADIX_TREE_MAP_SIZE     (1UL << RADIX_TREE_MAP_SHIFT)
  44#define RADIX_TREE_MAP_MASK     (RADIX_TREE_MAP_SIZE-1)
  45
  46#define RADIX_TREE_TAG_LONGS    \
  47        ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
  48
  49struct radix_tree_node {
  50        unsigned int    height;         /* Height from the bottom */
  51        unsigned int    count;
  52        union {
  53                struct radix_tree_node *parent; /* Used when ascending tree */
  54                struct rcu_head rcu_head;       /* Used when freeing node */
  55        };
  56        void __rcu      *slots[RADIX_TREE_MAP_SIZE];
  57        unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
  58};
  59
  60#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
  61#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
  62                                          RADIX_TREE_MAP_SHIFT))
  63
  64/*
  65 * The height_to_maxindex array needs to be one deeper than the maximum
  66 * path as height 0 holds only 1 entry.
  67 */
  68static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
  69
  70/*
  71 * Radix tree node cache.
  72 */
  73static struct kmem_cache *radix_tree_node_cachep;
  74
  75/*
  76 * The radix tree is variable-height, so an insert operation not only has
  77 * to build the branch to its corresponding item, it also has to build the
  78 * branch to existing items if the size has to be increased (by
  79 * radix_tree_extend).
  80 *
  81 * The worst case is a zero height tree with just a single item at index 0,
  82 * and then inserting an item at index ULONG_MAX. This requires 2 new branches
  83 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
  84 * Hence:
  85 */
  86#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
  87
  88/*
  89 * Per-cpu pool of preloaded nodes
  90 */
  91struct radix_tree_preload {
  92        int nr;
  93        struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE];
  94};
  95static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  96
  97static inline void *ptr_to_indirect(void *ptr)
  98{
  99        return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
 100}
 101
 102static inline void *indirect_to_ptr(void *ptr)
 103{
 104        return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
 105}
 106
 107static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
 108{
 109        return root->gfp_mask & __GFP_BITS_MASK;
 110}
 111
 112static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
 113                int offset)
 114{
 115        __set_bit(offset, node->tags[tag]);
 116}
 117
 118static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
 119                int offset)
 120{
 121        __clear_bit(offset, node->tags[tag]);
 122}
 123
 124static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
 125                int offset)
 126{
 127        return test_bit(offset, node->tags[tag]);
 128}
 129
 130static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
 131{
 132        root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
 133}
 134
 135static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
 136{
 137        root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
 138}
 139
 140static inline void root_tag_clear_all(struct radix_tree_root *root)
 141{
 142        root->gfp_mask &= __GFP_BITS_MASK;
 143}
 144
 145static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
 146{
 147        return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
 148}
 149
 150/*
 151 * Returns 1 if any slot in the node has this tag set.
 152 * Otherwise returns 0.
 153 */
 154static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
 155{
 156        int idx;
 157        for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
 158                if (node->tags[tag][idx])
 159                        return 1;
 160        }
 161        return 0;
 162}
 163
 164/**
 165 * radix_tree_find_next_bit - find the next set bit in a memory region
 166 *
 167 * @addr: The address to base the search on
 168 * @size: The bitmap size in bits
 169 * @offset: The bitnumber to start searching at
 170 *
 171 * Unrollable variant of find_next_bit() for constant size arrays.
 172 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
 173 * Returns next bit offset, or size if nothing found.
 174 */
 175static __always_inline unsigned long
 176radix_tree_find_next_bit(const unsigned long *addr,
 177                         unsigned long size, unsigned long offset)
 178{
 179        if (!__builtin_constant_p(size))
 180                return find_next_bit(addr, size, offset);
 181
 182        if (offset < size) {
 183                unsigned long tmp;
 184
 185                addr += offset / BITS_PER_LONG;
 186                tmp = *addr >> (offset % BITS_PER_LONG);
 187                if (tmp)
 188                        return __ffs(tmp) + offset;
 189                offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
 190                while (offset < size) {
 191                        tmp = *++addr;
 192                        if (tmp)
 193                                return __ffs(tmp) + offset;
 194                        offset += BITS_PER_LONG;
 195                }
 196        }
 197        return size;
 198}
 199
 200/*
 201 * This assumes that the caller has performed appropriate preallocation, and
 202 * that the caller has pinned this thread of control to the current CPU.
 203 */
 204static struct radix_tree_node *
 205radix_tree_node_alloc(struct radix_tree_root *root)
 206{
 207        struct radix_tree_node *ret = NULL;
 208        gfp_t gfp_mask = root_gfp_mask(root);
 209
 210        if (!(gfp_mask & __GFP_WAIT)) {
 211                struct radix_tree_preload *rtp;
 212
 213                /*
 214                 * Provided the caller has preloaded here, we will always
 215                 * succeed in getting a node here (and never reach
 216                 * kmem_cache_alloc)
 217                 */
 218                rtp = &__get_cpu_var(radix_tree_preloads);
 219                if (rtp->nr) {
 220                        ret = rtp->nodes[rtp->nr - 1];
 221                        rtp->nodes[rtp->nr - 1] = NULL;
 222                        rtp->nr--;
 223                }
 224        }
 225        if (ret == NULL)
 226                ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 227
 228        BUG_ON(radix_tree_is_indirect_ptr(ret));
 229        return ret;
 230}
 231
 232static void radix_tree_node_rcu_free(struct rcu_head *head)
 233{
 234        struct radix_tree_node *node =
 235                        container_of(head, struct radix_tree_node, rcu_head);
 236        int i;
 237
 238        /*
 239         * must only free zeroed nodes into the slab. radix_tree_shrink
 240         * can leave us with a non-NULL entry in the first slot, so clear
 241         * that here to make sure.
 242         */
 243        for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
 244                tag_clear(node, i, 0);
 245
 246        node->slots[0] = NULL;
 247        node->count = 0;
 248
 249        kmem_cache_free(radix_tree_node_cachep, node);
 250}
 251
 252static inline void
 253radix_tree_node_free(struct radix_tree_node *node)
 254{
 255        call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
 256}
 257
 258/*
 259 * Load up this CPU's radix_tree_node buffer with sufficient objects to
 260 * ensure that the addition of a single element in the tree cannot fail.  On
 261 * success, return zero, with preemption disabled.  On error, return -ENOMEM
 262 * with preemption not disabled.
 263 *
 264 * To make use of this facility, the radix tree must be initialised without
 265 * __GFP_WAIT being passed to INIT_RADIX_TREE().
 266 */
 267int radix_tree_preload(gfp_t gfp_mask)
 268{
 269        struct radix_tree_preload *rtp;
 270        struct radix_tree_node *node;
 271        int ret = -ENOMEM;
 272
 273        preempt_disable();
 274        rtp = &__get_cpu_var(radix_tree_preloads);
 275        while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
 276                preempt_enable();
 277                node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 278                if (node == NULL)
 279                        goto out;
 280                preempt_disable();
 281                rtp = &__get_cpu_var(radix_tree_preloads);
 282                if (rtp->nr < ARRAY_SIZE(rtp->nodes))
 283                        rtp->nodes[rtp->nr++] = node;
 284                else
 285                        kmem_cache_free(radix_tree_node_cachep, node);
 286        }
 287        ret = 0;
 288out:
 289        return ret;
 290}
 291EXPORT_SYMBOL(radix_tree_preload);
 292
 293/*
 294 *      Return the maximum key which can be store into a
 295 *      radix tree with height HEIGHT.
 296 */
 297static inline unsigned long radix_tree_maxindex(unsigned int height)
 298{
 299        return height_to_maxindex[height];
 300}
 301
 302/*
 303 *      Extend a radix tree so it can store key @index.
 304 */
 305static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 306{
 307        struct radix_tree_node *node;
 308        struct radix_tree_node *slot;
 309        unsigned int height;
 310        int tag;
 311
 312        /* Figure out what the height should be.  */
 313        height = root->height + 1;
 314        while (index > radix_tree_maxindex(height))
 315                height++;
 316
 317        if (root->rnode == NULL) {
 318                root->height = height;
 319                goto out;
 320        }
 321
 322        do {
 323                unsigned int newheight;
 324                if (!(node = radix_tree_node_alloc(root)))
 325                        return -ENOMEM;
 326
 327                /* Propagate the aggregated tag info into the new root */
 328                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
 329                        if (root_tag_get(root, tag))
 330                                tag_set(node, tag, 0);
 331                }
 332
 333                /* Increase the height.  */
 334                newheight = root->height+1;
 335                node->height = newheight;
 336                node->count = 1;
 337                node->parent = NULL;
 338                slot = root->rnode;
 339                if (newheight > 1) {
 340                        slot = indirect_to_ptr(slot);
 341                        slot->parent = node;
 342                }
 343                node->slots[0] = slot;
 344                node = ptr_to_indirect(node);
 345                rcu_assign_pointer(root->rnode, node);
 346                root->height = newheight;
 347        } while (height > root->height);
 348out:
 349        return 0;
 350}
 351
 352/**
 353 *      radix_tree_insert    -    insert into a radix tree
 354 *      @root:          radix tree root
 355 *      @index:         index key
 356 *      @item:          item to insert
 357 *
 358 *      Insert an item into the radix tree at position @index.
 359 */
 360int radix_tree_insert(struct radix_tree_root *root,
 361                        unsigned long index, void *item)
 362{
 363        struct radix_tree_node *node = NULL, *slot;
 364        unsigned int height, shift;
 365        int offset;
 366        int error;
 367
 368        BUG_ON(radix_tree_is_indirect_ptr(item));
 369
 370        /* Make sure the tree is high enough.  */
 371        if (index > radix_tree_maxindex(root->height)) {
 372                error = radix_tree_extend(root, index);
 373                if (error)
 374                        return error;
 375        }
 376
 377        slot = indirect_to_ptr(root->rnode);
 378
 379        height = root->height;
 380        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 381
 382        offset = 0;                     /* uninitialised var warning */
 383        while (height > 0) {
 384                if (slot == NULL) {
 385                        /* Have to add a child node.  */
 386                        if (!(slot = radix_tree_node_alloc(root)))
 387                                return -ENOMEM;
 388                        slot->height = height;
 389                        slot->parent = node;
 390                        if (node) {
 391                                rcu_assign_pointer(node->slots[offset], slot);
 392                                node->count++;
 393                        } else
 394                                rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
 395                }
 396
 397                /* Go a level down */
 398                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 399                node = slot;
 400                slot = node->slots[offset];
 401                shift -= RADIX_TREE_MAP_SHIFT;
 402                height--;
 403        }
 404
 405        if (slot != NULL)
 406                return -EEXIST;
 407
 408        if (node) {
 409                node->count++;
 410                rcu_assign_pointer(node->slots[offset], item);
 411                BUG_ON(tag_get(node, 0, offset));
 412                BUG_ON(tag_get(node, 1, offset));
 413        } else {
 414                rcu_assign_pointer(root->rnode, item);
 415                BUG_ON(root_tag_get(root, 0));
 416                BUG_ON(root_tag_get(root, 1));
 417        }
 418
 419        return 0;
 420}
 421EXPORT_SYMBOL(radix_tree_insert);
 422
 423/*
 424 * is_slot == 1 : search for the slot.
 425 * is_slot == 0 : search for the node.
 426 */
 427static void *radix_tree_lookup_element(struct radix_tree_root *root,
 428                                unsigned long index, int is_slot)
 429{
 430        unsigned int height, shift;
 431        struct radix_tree_node *node, **slot;
 432
 433        node = rcu_dereference_raw(root->rnode);
 434        if (node == NULL)
 435                return NULL;
 436
 437        if (!radix_tree_is_indirect_ptr(node)) {
 438                if (index > 0)
 439                        return NULL;
 440                return is_slot ? (void *)&root->rnode : node;
 441        }
 442        node = indirect_to_ptr(node);
 443
 444        height = node->height;
 445        if (index > radix_tree_maxindex(height))
 446                return NULL;
 447
 448        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 449
 450        do {
 451                slot = (struct radix_tree_node **)
 452                        (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
 453                node = rcu_dereference_raw(*slot);
 454                if (node == NULL)
 455                        return NULL;
 456
 457                shift -= RADIX_TREE_MAP_SHIFT;
 458                height--;
 459        } while (height > 0);
 460
 461        return is_slot ? (void *)slot : indirect_to_ptr(node);
 462}
 463
 464/**
 465 *      radix_tree_lookup_slot    -    lookup a slot in a radix tree
 466 *      @root:          radix tree root
 467 *      @index:         index key
 468 *
 469 *      Returns:  the slot corresponding to the position @index in the
 470 *      radix tree @root. This is useful for update-if-exists operations.
 471 *
 472 *      This function can be called under rcu_read_lock iff the slot is not
 473 *      modified by radix_tree_replace_slot, otherwise it must be called
 474 *      exclusive from other writers. Any dereference of the slot must be done
 475 *      using radix_tree_deref_slot.
 476 */
 477void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 478{
 479        return (void **)radix_tree_lookup_element(root, index, 1);
 480}
 481EXPORT_SYMBOL(radix_tree_lookup_slot);
 482
 483/**
 484 *      radix_tree_lookup    -    perform lookup operation on a radix tree
 485 *      @root:          radix tree root
 486 *      @index:         index key
 487 *
 488 *      Lookup the item at the position @index in the radix tree @root.
 489 *
 490 *      This function can be called under rcu_read_lock, however the caller
 491 *      must manage lifetimes of leaf nodes (eg. RCU may also be used to free
 492 *      them safely). No RCU barriers are required to access or modify the
 493 *      returned item, however.
 494 */
 495void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 496{
 497        return radix_tree_lookup_element(root, index, 0);
 498}
 499EXPORT_SYMBOL(radix_tree_lookup);
 500
 501/**
 502 *      radix_tree_tag_set - set a tag on a radix tree node
 503 *      @root:          radix tree root
 504 *      @index:         index key
 505 *      @tag:           tag index
 506 *
 507 *      Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
 508 *      corresponding to @index in the radix tree.  From
 509 *      the root all the way down to the leaf node.
 510 *
 511 *      Returns the address of the tagged item.   Setting a tag on a not-present
 512 *      item is a bug.
 513 */
 514void *radix_tree_tag_set(struct radix_tree_root *root,
 515                        unsigned long index, unsigned int tag)
 516{
 517        unsigned int height, shift;
 518        struct radix_tree_node *slot;
 519
 520        height = root->height;
 521        BUG_ON(index > radix_tree_maxindex(height));
 522
 523        slot = indirect_to_ptr(root->rnode);
 524        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 525
 526        while (height > 0) {
 527                int offset;
 528
 529                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 530                if (!tag_get(slot, tag, offset))
 531                        tag_set(slot, tag, offset);
 532                slot = slot->slots[offset];
 533                BUG_ON(slot == NULL);
 534                shift -= RADIX_TREE_MAP_SHIFT;
 535                height--;
 536        }
 537
 538        /* set the root's tag bit */
 539        if (slot && !root_tag_get(root, tag))
 540                root_tag_set(root, tag);
 541
 542        return slot;
 543}
 544EXPORT_SYMBOL(radix_tree_tag_set);
 545
 546/**
 547 *      radix_tree_tag_clear - clear a tag on a radix tree node
 548 *      @root:          radix tree root
 549 *      @index:         index key
 550 *      @tag:           tag index
 551 *
 552 *      Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
 553 *      corresponding to @index in the radix tree.  If
 554 *      this causes the leaf node to have no tags set then clear the tag in the
 555 *      next-to-leaf node, etc.
 556 *
 557 *      Returns the address of the tagged item on success, else NULL.  ie:
 558 *      has the same return value and semantics as radix_tree_lookup().
 559 */
 560void *radix_tree_tag_clear(struct radix_tree_root *root,
 561                        unsigned long index, unsigned int tag)
 562{
 563        struct radix_tree_node *node = NULL;
 564        struct radix_tree_node *slot = NULL;
 565        unsigned int height, shift;
 566        int uninitialized_var(offset);
 567
 568        height = root->height;
 569        if (index > radix_tree_maxindex(height))
 570                goto out;
 571
 572        shift = height * RADIX_TREE_MAP_SHIFT;
 573        slot = indirect_to_ptr(root->rnode);
 574
 575        while (shift) {
 576                if (slot == NULL)
 577                        goto out;
 578
 579                shift -= RADIX_TREE_MAP_SHIFT;
 580                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 581                node = slot;
 582                slot = slot->slots[offset];
 583        }
 584
 585        if (slot == NULL)
 586                goto out;
 587
 588        while (node) {
 589                if (!tag_get(node, tag, offset))
 590                        goto out;
 591                tag_clear(node, tag, offset);
 592                if (any_tag_set(node, tag))
 593                        goto out;
 594
 595                index >>= RADIX_TREE_MAP_SHIFT;
 596                offset = index & RADIX_TREE_MAP_MASK;
 597                node = node->parent;
 598        }
 599
 600        /* clear the root's tag bit */
 601        if (root_tag_get(root, tag))
 602                root_tag_clear(root, tag);
 603
 604out:
 605        return slot;
 606}
 607EXPORT_SYMBOL(radix_tree_tag_clear);
 608
 609/**
 610 * radix_tree_tag_get - get a tag on a radix tree node
 611 * @root:               radix tree root
 612 * @index:              index key
 613 * @tag:                tag index (< RADIX_TREE_MAX_TAGS)
 614 *
 615 * Return values:
 616 *
 617 *  0: tag not present or not set
 618 *  1: tag set
 619 *
 620 * Note that the return value of this function may not be relied on, even if
 621 * the RCU lock is held, unless tag modification and node deletion are excluded
 622 * from concurrency.
 623 */
 624int radix_tree_tag_get(struct radix_tree_root *root,
 625                        unsigned long index, unsigned int tag)
 626{
 627        unsigned int height, shift;
 628        struct radix_tree_node *node;
 629
 630        /* check the root's tag bit */
 631        if (!root_tag_get(root, tag))
 632                return 0;
 633
 634        node = rcu_dereference_raw(root->rnode);
 635        if (node == NULL)
 636                return 0;
 637
 638        if (!radix_tree_is_indirect_ptr(node))
 639                return (index == 0);
 640        node = indirect_to_ptr(node);
 641
 642        height = node->height;
 643        if (index > radix_tree_maxindex(height))
 644                return 0;
 645
 646        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 647
 648        for ( ; ; ) {
 649                int offset;
 650
 651                if (node == NULL)
 652                        return 0;
 653
 654                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 655                if (!tag_get(node, tag, offset))
 656                        return 0;
 657                if (height == 1)
 658                        return 1;
 659                node = rcu_dereference_raw(node->slots[offset]);
 660                shift -= RADIX_TREE_MAP_SHIFT;
 661                height--;
 662        }
 663}
 664EXPORT_SYMBOL(radix_tree_tag_get);
 665
 666/**
 667 * radix_tree_next_chunk - find next chunk of slots for iteration
 668 *
 669 * @root:       radix tree root
 670 * @iter:       iterator state
 671 * @flags:      RADIX_TREE_ITER_* flags and tag index
 672 * Returns:     pointer to chunk first slot, or NULL if iteration is over
 673 */
 674void **radix_tree_next_chunk(struct radix_tree_root *root,
 675                             struct radix_tree_iter *iter, unsigned flags)
 676{
 677        unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
 678        struct radix_tree_node *rnode, *node;
 679        unsigned long index, offset;
 680
 681        if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
 682                return NULL;
 683
 684        /*
 685         * Catch next_index overflow after ~0UL. iter->index never overflows
 686         * during iterating; it can be zero only at the beginning.
 687         * And we cannot overflow iter->next_index in a single step,
 688         * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
 689         *
 690         * This condition also used by radix_tree_next_slot() to stop
 691         * contiguous iterating, and forbid swithing to the next chunk.
 692         */
 693        index = iter->next_index;
 694        if (!index && iter->index)
 695                return NULL;
 696
 697        rnode = rcu_dereference_raw(root->rnode);
 698        if (radix_tree_is_indirect_ptr(rnode)) {
 699                rnode = indirect_to_ptr(rnode);
 700        } else if (rnode && !index) {
 701                /* Single-slot tree */
 702                iter->index = 0;
 703                iter->next_index = 1;
 704                iter->tags = 1;
 705                return (void **)&root->rnode;
 706        } else
 707                return NULL;
 708
 709restart:
 710        shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
 711        offset = index >> shift;
 712
 713        /* Index outside of the tree */
 714        if (offset >= RADIX_TREE_MAP_SIZE)
 715                return NULL;
 716
 717        node = rnode;
 718        while (1) {
 719                if ((flags & RADIX_TREE_ITER_TAGGED) ?
 720                                !test_bit(offset, node->tags[tag]) :
 721                                !node->slots[offset]) {
 722                        /* Hole detected */
 723                        if (flags & RADIX_TREE_ITER_CONTIG)
 724                                return NULL;
 725
 726                        if (flags & RADIX_TREE_ITER_TAGGED)
 727                                offset = radix_tree_find_next_bit(
 728                                                node->tags[tag],
 729                                                RADIX_TREE_MAP_SIZE,
 730                                                offset + 1);
 731                        else
 732                                while (++offset < RADIX_TREE_MAP_SIZE) {
 733                                        if (node->slots[offset])
 734                                                break;
 735                                }
 736                        index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
 737                        index += offset << shift;
 738                        /* Overflow after ~0UL */
 739                        if (!index)
 740                                return NULL;
 741                        if (offset == RADIX_TREE_MAP_SIZE)
 742                                goto restart;
 743                }
 744
 745                /* This is leaf-node */
 746                if (!shift)
 747                        break;
 748
 749                node = rcu_dereference_raw(node->slots[offset]);
 750                if (node == NULL)
 751                        goto restart;
 752                shift -= RADIX_TREE_MAP_SHIFT;
 753                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 754        }
 755
 756        /* Update the iterator state */
 757        iter->index = index;
 758        iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
 759
 760        /* Construct iter->tags bit-mask from node->tags[tag] array */
 761        if (flags & RADIX_TREE_ITER_TAGGED) {
 762                unsigned tag_long, tag_bit;
 763
 764                tag_long = offset / BITS_PER_LONG;
 765                tag_bit  = offset % BITS_PER_LONG;
 766                iter->tags = node->tags[tag][tag_long] >> tag_bit;
 767                /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
 768                if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
 769                        /* Pick tags from next element */
 770                        if (tag_bit)
 771                                iter->tags |= node->tags[tag][tag_long + 1] <<
 772                                                (BITS_PER_LONG - tag_bit);
 773                        /* Clip chunk size, here only BITS_PER_LONG tags */
 774                        iter->next_index = index + BITS_PER_LONG;
 775                }
 776        }
 777
 778        return node->slots + offset;
 779}
 780EXPORT_SYMBOL(radix_tree_next_chunk);
 781
 782/**
 783 * radix_tree_range_tag_if_tagged - for each item in given range set given
 784 *                                 tag if item has another tag set
 785 * @root:               radix tree root
 786 * @first_indexp:       pointer to a starting index of a range to scan
 787 * @last_index:         last index of a range to scan
 788 * @nr_to_tag:          maximum number items to tag
 789 * @iftag:              tag index to test
 790 * @settag:             tag index to set if tested tag is set
 791 *
 792 * This function scans range of radix tree from first_index to last_index
 793 * (inclusive).  For each item in the range if iftag is set, the function sets
 794 * also settag. The function stops either after tagging nr_to_tag items or
 795 * after reaching last_index.
 796 *
 797 * The tags must be set from the leaf level only and propagated back up the
 798 * path to the root. We must do this so that we resolve the full path before
 799 * setting any tags on intermediate nodes. If we set tags as we descend, then
 800 * we can get to the leaf node and find that the index that has the iftag
 801 * set is outside the range we are scanning. This reults in dangling tags and
 802 * can lead to problems with later tag operations (e.g. livelocks on lookups).
 803 *
 804 * The function returns number of leaves where the tag was set and sets
 805 * *first_indexp to the first unscanned index.
 806 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
 807 * be prepared to handle that.
 808 */
 809unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 810                unsigned long *first_indexp, unsigned long last_index,
 811                unsigned long nr_to_tag,
 812                unsigned int iftag, unsigned int settag)
 813{
 814        unsigned int height = root->height;
 815        struct radix_tree_node *node = NULL;
 816        struct radix_tree_node *slot;
 817        unsigned int shift;
 818        unsigned long tagged = 0;
 819        unsigned long index = *first_indexp;
 820
 821        last_index = min(last_index, radix_tree_maxindex(height));
 822        if (index > last_index)
 823                return 0;
 824        if (!nr_to_tag)
 825                return 0;
 826        if (!root_tag_get(root, iftag)) {
 827                *first_indexp = last_index + 1;
 828                return 0;
 829        }
 830        if (height == 0) {
 831                *first_indexp = last_index + 1;
 832                root_tag_set(root, settag);
 833                return 1;
 834        }
 835
 836        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 837        slot = indirect_to_ptr(root->rnode);
 838
 839        for (;;) {
 840                unsigned long upindex;
 841                int offset;
 842
 843                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 844                if (!slot->slots[offset])
 845                        goto next;
 846                if (!tag_get(slot, iftag, offset))
 847                        goto next;
 848                if (shift) {
 849                        /* Go down one level */
 850                        shift -= RADIX_TREE_MAP_SHIFT;
 851                        node = slot;
 852                        slot = slot->slots[offset];
 853                        continue;
 854                }
 855
 856                /* tag the leaf */
 857                tagged++;
 858                tag_set(slot, settag, offset);
 859
 860                /* walk back up the path tagging interior nodes */
 861                upindex = index;
 862                while (node) {
 863                        upindex >>= RADIX_TREE_MAP_SHIFT;
 864                        offset = upindex & RADIX_TREE_MAP_MASK;
 865
 866                        /* stop if we find a node with the tag already set */
 867                        if (tag_get(node, settag, offset))
 868                                break;
 869                        tag_set(node, settag, offset);
 870                        node = node->parent;
 871                }
 872
 873                /*
 874                 * Small optimization: now clear that node pointer.
 875                 * Since all of this slot's ancestors now have the tag set
 876                 * from setting it above, we have no further need to walk
 877                 * back up the tree setting tags, until we update slot to
 878                 * point to another radix_tree_node.
 879                 */
 880                node = NULL;
 881
 882next:
 883                /* Go to next item at level determined by 'shift' */
 884                index = ((index >> shift) + 1) << shift;
 885                /* Overflow can happen when last_index is ~0UL... */
 886                if (index > last_index || !index)
 887                        break;
 888                if (tagged >= nr_to_tag)
 889                        break;
 890                while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
 891                        /*
 892                         * We've fully scanned this node. Go up. Because
 893                         * last_index is guaranteed to be in the tree, what
 894                         * we do below cannot wander astray.
 895                         */
 896                        slot = slot->parent;
 897                        shift += RADIX_TREE_MAP_SHIFT;
 898                }
 899        }
 900        /*
 901         * We need not to tag the root tag if there is no tag which is set with
 902         * settag within the range from *first_indexp to last_index.
 903         */
 904        if (tagged > 0)
 905                root_tag_set(root, settag);
 906        *first_indexp = index;
 907
 908        return tagged;
 909}
 910EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
 911
 912
 913/**
 914 *      radix_tree_next_hole    -    find the next hole (not-present entry)
 915 *      @root:          tree root
 916 *      @index:         index key
 917 *      @max_scan:      maximum range to search
 918 *
 919 *      Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
 920 *      indexed hole.
 921 *
 922 *      Returns: the index of the hole if found, otherwise returns an index
 923 *      outside of the set specified (in which case 'return - index >= max_scan'
 924 *      will be true). In rare cases of index wrap-around, 0 will be returned.
 925 *
 926 *      radix_tree_next_hole may be called under rcu_read_lock. However, like
 927 *      radix_tree_gang_lookup, this will not atomically search a snapshot of
 928 *      the tree at a single point in time. For example, if a hole is created
 929 *      at index 5, then subsequently a hole is created at index 10,
 930 *      radix_tree_next_hole covering both indexes may return 10 if called
 931 *      under rcu_read_lock.
 932 */
 933unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 934                                unsigned long index, unsigned long max_scan)
 935{
 936        unsigned long i;
 937
 938        for (i = 0; i < max_scan; i++) {
 939                if (!radix_tree_lookup(root, index))
 940                        break;
 941                index++;
 942                if (index == 0)
 943                        break;
 944        }
 945
 946        return index;
 947}
 948EXPORT_SYMBOL(radix_tree_next_hole);
 949
 950/**
 951 *      radix_tree_prev_hole    -    find the prev hole (not-present entry)
 952 *      @root:          tree root
 953 *      @index:         index key
 954 *      @max_scan:      maximum range to search
 955 *
 956 *      Search backwards in the range [max(index-max_scan+1, 0), index]
 957 *      for the first hole.
 958 *
 959 *      Returns: the index of the hole if found, otherwise returns an index
 960 *      outside of the set specified (in which case 'index - return >= max_scan'
 961 *      will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
 962 *
 963 *      radix_tree_next_hole may be called under rcu_read_lock. However, like
 964 *      radix_tree_gang_lookup, this will not atomically search a snapshot of
 965 *      the tree at a single point in time. For example, if a hole is created
 966 *      at index 10, then subsequently a hole is created at index 5,
 967 *      radix_tree_prev_hole covering both indexes may return 5 if called under
 968 *      rcu_read_lock.
 969 */
 970unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 971                                   unsigned long index, unsigned long max_scan)
 972{
 973        unsigned long i;
 974
 975        for (i = 0; i < max_scan; i++) {
 976                if (!radix_tree_lookup(root, index))
 977                        break;
 978                index--;
 979                if (index == ULONG_MAX)
 980                        break;
 981        }
 982
 983        return index;
 984}
 985EXPORT_SYMBOL(radix_tree_prev_hole);
 986
 987/**
 988 *      radix_tree_gang_lookup - perform multiple lookup on a radix tree
 989 *      @root:          radix tree root
 990 *      @results:       where the results of the lookup are placed
 991 *      @first_index:   start the lookup from this key
 992 *      @max_items:     place up to this many items at *results
 993 *
 994 *      Performs an index-ascending scan of the tree for present items.  Places
 995 *      them at *@results and returns the number of items which were placed at
 996 *      *@results.
 997 *
 998 *      The implementation is naive.
 999 *
1000 *      Like radix_tree_lookup, radix_tree_gang_lookup may be called under
1001 *      rcu_read_lock. In this case, rather than the returned results being
1002 *      an atomic snapshot of the tree at a single point in time, the semantics
1003 *      of an RCU protected gang lookup are as though multiple radix_tree_lookups
1004 *      have been issued in individual locks, and results stored in 'results'.
1005 */
1006unsigned int
1007radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
1008                        unsigned long first_index, unsigned int max_items)
1009{
1010        struct radix_tree_iter iter;
1011        void **slot;
1012        unsigned int ret = 0;
1013
1014        if (unlikely(!max_items))
1015                return 0;
1016
1017        radix_tree_for_each_slot(slot, root, &iter, first_index) {
1018                results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
1019                if (!results[ret])
1020                        continue;
1021                if (++ret == max_items)
1022                        break;
1023        }
1024
1025        return ret;
1026}
1027EXPORT_SYMBOL(radix_tree_gang_lookup);
1028
1029/**
1030 *      radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
1031 *      @root:          radix tree root
1032 *      @results:       where the results of the lookup are placed
1033 *      @indices:       where their indices should be placed (but usually NULL)
1034 *      @first_index:   start the lookup from this key
1035 *      @max_items:     place up to this many items at *results
1036 *
1037 *      Performs an index-ascending scan of the tree for present items.  Places
1038 *      their slots at *@results and returns the number of items which were
1039 *      placed at *@results.
1040 *
1041 *      The implementation is naive.
1042 *
1043 *      Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
1044 *      be dereferenced with radix_tree_deref_slot, and if using only RCU
1045 *      protection, radix_tree_deref_slot may fail requiring a retry.
1046 */
1047unsigned int
1048radix_tree_gang_lookup_slot(struct radix_tree_root *root,
1049                        void ***results, unsigned long *indices,
1050                        unsigned long first_index, unsigned int max_items)
1051{
1052        struct radix_tree_iter iter;
1053        void **slot;
1054        unsigned int ret = 0;
1055
1056        if (unlikely(!max_items))
1057                return 0;
1058
1059        radix_tree_for_each_slot(slot, root, &iter, first_index) {
1060                results[ret] = slot;
1061                if (indices)
1062                        indices[ret] = iter.index;
1063                if (++ret == max_items)
1064                        break;
1065        }
1066
1067        return ret;
1068}
1069EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
1070
1071/**
1072 *      radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
1073 *                                   based on a tag
1074 *      @root:          radix tree root
1075 *      @results:       where the results of the lookup are placed
1076 *      @first_index:   start the lookup from this key
1077 *      @max_items:     place up to this many items at *results
1078 *      @tag:           the tag index (< RADIX_TREE_MAX_TAGS)
1079 *
1080 *      Performs an index-ascending scan of the tree for present items which
1081 *      have the tag indexed by @tag set.  Places the items at *@results and
1082 *      returns the number of items which were placed at *@results.
1083 */
1084unsigned int
1085radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1086                unsigned long first_index, unsigned int max_items,
1087                unsigned int tag)
1088{
1089        struct radix_tree_iter iter;
1090        void **slot;
1091        unsigned int ret = 0;
1092
1093        if (unlikely(!max_items))
1094                return 0;
1095
1096        radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1097                results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
1098                if (!results[ret])
1099                        continue;
1100                if (++ret == max_items)
1101                        break;
1102        }
1103
1104        return ret;
1105}
1106EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
1107
1108/**
1109 *      radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
1110 *                                        radix tree based on a tag
1111 *      @root:          radix tree root
1112 *      @results:       where the results of the lookup are placed
1113 *      @first_index:   start the lookup from this key
1114 *      @max_items:     place up to this many items at *results
1115 *      @tag:           the tag index (< RADIX_TREE_MAX_TAGS)
1116 *
1117 *      Performs an index-ascending scan of the tree for present items which
1118 *      have the tag indexed by @tag set.  Places the slots at *@results and
1119 *      returns the number of slots which were placed at *@results.
1120 */
1121unsigned int
1122radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1123                unsigned long first_index, unsigned int max_items,
1124                unsigned int tag)
1125{
1126        struct radix_tree_iter iter;
1127        void **slot;
1128        unsigned int ret = 0;
1129
1130        if (unlikely(!max_items))
1131                return 0;
1132
1133        radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1134                results[ret] = slot;
1135                if (++ret == max_items)
1136                        break;
1137        }
1138
1139        return ret;
1140}
1141EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1142
1143#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
1144#include <linux/sched.h> /* for cond_resched() */
1145
1146/*
1147 * This linear search is at present only useful to shmem_unuse_inode().
1148 */
1149static unsigned long __locate(struct radix_tree_node *slot, void *item,
1150                              unsigned long index, unsigned long *found_index)
1151{
1152        unsigned int shift, height;
1153        unsigned long i;
1154
1155        height = slot->height;
1156        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1157
1158        for ( ; height > 1; height--) {
1159                i = (index >> shift) & RADIX_TREE_MAP_MASK;
1160                for (;;) {
1161                        if (slot->slots[i] != NULL)
1162                                break;
1163                        index &= ~((1UL << shift) - 1);
1164                        index += 1UL << shift;
1165                        if (index == 0)
1166                                goto out;       /* 32-bit wraparound */
1167                        i++;
1168                        if (i == RADIX_TREE_MAP_SIZE)
1169                                goto out;
1170                }
1171
1172                shift -= RADIX_TREE_MAP_SHIFT;
1173                slot = rcu_dereference_raw(slot->slots[i]);
1174                if (slot == NULL)
1175                        goto out;
1176        }
1177
1178        /* Bottom level: check items */
1179        for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
1180                if (slot->slots[i] == item) {
1181                        *found_index = index + i;
1182                        index = 0;
1183                        goto out;
1184                }
1185        }
1186        index += RADIX_TREE_MAP_SIZE;
1187out:
1188        return index;
1189}
1190
1191/**
1192 *      radix_tree_locate_item - search through radix tree for item
1193 *      @root:          radix tree root
1194 *      @item:          item to be found
1195 *
1196 *      Returns index where item was found, or -1 if not found.
1197 *      Caller must hold no lock (since this time-consuming function needs
1198 *      to be preemptible), and must check afterwards if item is still there.
1199 */
1200unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1201{
1202        struct radix_tree_node *node;
1203        unsigned long max_index;
1204        unsigned long cur_index = 0;
1205        unsigned long found_index = -1;
1206
1207        do {
1208                rcu_read_lock();
1209                node = rcu_dereference_raw(root->rnode);
1210                if (!radix_tree_is_indirect_ptr(node)) {
1211                        rcu_read_unlock();
1212                        if (node == item)
1213                                found_index = 0;
1214                        break;
1215                }
1216
1217                node = indirect_to_ptr(node);
1218                max_index = radix_tree_maxindex(node->height);
1219                if (cur_index > max_index)
1220                        break;
1221
1222                cur_index = __locate(node, item, cur_index, &found_index);
1223                rcu_read_unlock();
1224                cond_resched();
1225        } while (cur_index != 0 && cur_index <= max_index);
1226
1227        return found_index;
1228}
1229#else
1230unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1231{
1232        return -1;
1233}
1234#endif /* CONFIG_SHMEM && CONFIG_SWAP */
1235
1236/**
1237 *      radix_tree_shrink    -    shrink height of a radix tree to minimal
1238 *      @root           radix tree root
1239 */
1240static inline void radix_tree_shrink(struct radix_tree_root *root)
1241{
1242        /* try to shrink tree height */
1243        while (root->height > 0) {
1244                struct radix_tree_node *to_free = root->rnode;
1245                struct radix_tree_node *slot;
1246
1247                BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1248                to_free = indirect_to_ptr(to_free);
1249
1250                /*
1251                 * The candidate node has more than one child, or its child
1252                 * is not at the leftmost slot, we cannot shrink.
1253                 */
1254                if (to_free->count != 1)
1255                        break;
1256                if (!to_free->slots[0])
1257                        break;
1258
1259                /*
1260                 * We don't need rcu_assign_pointer(), since we are simply
1261                 * moving the node from one part of the tree to another: if it
1262                 * was safe to dereference the old pointer to it
1263                 * (to_free->slots[0]), it will be safe to dereference the new
1264                 * one (root->rnode) as far as dependent read barriers go.
1265                 */
1266                slot = to_free->slots[0];
1267                if (root->height > 1) {
1268                        slot->parent = NULL;
1269                        slot = ptr_to_indirect(slot);
1270                }
1271                root->rnode = slot;
1272                root->height--;
1273
1274                /*
1275                 * We have a dilemma here. The node's slot[0] must not be
1276                 * NULLed in case there are concurrent lookups expecting to
1277                 * find the item. However if this was a bottom-level node,
1278                 * then it may be subject to the slot pointer being visible
1279                 * to callers dereferencing it. If item corresponding to
1280                 * slot[0] is subsequently deleted, these callers would expect
1281                 * their slot to become empty sooner or later.
1282                 *
1283                 * For example, lockless pagecache will look up a slot, deref
1284                 * the page pointer, and if the page is 0 refcount it means it
1285                 * was concurrently deleted from pagecache so try the deref
1286                 * again. Fortunately there is already a requirement for logic
1287                 * to retry the entire slot lookup -- the indirect pointer
1288                 * problem (replacing direct root node with an indirect pointer
1289                 * also results in a stale slot). So tag the slot as indirect
1290                 * to force callers to retry.
1291                 */
1292                if (root->height == 0)
1293                        *((unsigned long *)&to_free->slots[0]) |=
1294                                                RADIX_TREE_INDIRECT_PTR;
1295
1296                radix_tree_node_free(to_free);
1297        }
1298}
1299
1300/**
1301 *      radix_tree_delete    -    delete an item from a radix tree
1302 *      @root:          radix tree root
1303 *      @index:         index key
1304 *
1305 *      Remove the item at @index from the radix tree rooted at @root.
1306 *
1307 *      Returns the address of the deleted item, or NULL if it was not present.
1308 */
1309void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1310{
1311        struct radix_tree_node *node = NULL;
1312        struct radix_tree_node *slot = NULL;
1313        struct radix_tree_node *to_free;
1314        unsigned int height, shift;
1315        int tag;
1316        int uninitialized_var(offset);
1317
1318        height = root->height;
1319        if (index > radix_tree_maxindex(height))
1320                goto out;
1321
1322        slot = root->rnode;
1323        if (height == 0) {
1324                root_tag_clear_all(root);
1325                root->rnode = NULL;
1326                goto out;
1327        }
1328        slot = indirect_to_ptr(slot);
1329        shift = height * RADIX_TREE_MAP_SHIFT;
1330
1331        do {
1332                if (slot == NULL)
1333                        goto out;
1334
1335                shift -= RADIX_TREE_MAP_SHIFT;
1336                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1337                node = slot;
1338                slot = slot->slots[offset];
1339        } while (shift);
1340
1341        if (slot == NULL)
1342                goto out;
1343
1344        /*
1345         * Clear all tags associated with the item to be deleted.
1346         * This way of doing it would be inefficient, but seldom is any set.
1347         */
1348        for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1349                if (tag_get(node, tag, offset))
1350                        radix_tree_tag_clear(root, index, tag);
1351        }
1352
1353        to_free = NULL;
1354        /* Now free the nodes we do not need anymore */
1355        while (node) {
1356                node->slots[offset] = NULL;
1357                node->count--;
1358                /*
1359                 * Queue the node for deferred freeing after the
1360                 * last reference to it disappears (set NULL, above).
1361                 */
1362                if (to_free)
1363                        radix_tree_node_free(to_free);
1364
1365                if (node->count) {
1366                        if (node == indirect_to_ptr(root->rnode))
1367                                radix_tree_shrink(root);
1368                        goto out;
1369                }
1370
1371                /* Node with zero slots in use so free it */
1372                to_free = node;
1373
1374                index >>= RADIX_TREE_MAP_SHIFT;
1375                offset = index & RADIX_TREE_MAP_MASK;
1376                node = node->parent;
1377        }
1378
1379        root_tag_clear_all(root);
1380        root->height = 0;
1381        root->rnode = NULL;
1382        if (to_free)
1383                radix_tree_node_free(to_free);
1384
1385out:
1386        return slot;
1387}
1388EXPORT_SYMBOL(radix_tree_delete);
1389
1390/**
1391 *      radix_tree_tagged - test whether any items in the tree are tagged
1392 *      @root:          radix tree root
1393 *      @tag:           tag to test
1394 */
1395int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
1396{
1397        return root_tag_get(root, tag);
1398}
1399EXPORT_SYMBOL(radix_tree_tagged);
1400
1401static void
1402radix_tree_node_ctor(void *node)
1403{
1404        memset(node, 0, sizeof(struct radix_tree_node));
1405}
1406
1407static __init unsigned long __maxindex(unsigned int height)
1408{
1409        unsigned int width = height * RADIX_TREE_MAP_SHIFT;
1410        int shift = RADIX_TREE_INDEX_BITS - width;
1411
1412        if (shift < 0)
1413                return ~0UL;
1414        if (shift >= BITS_PER_LONG)
1415                return 0UL;
1416        return ~0UL >> shift;
1417}
1418
1419static __init void radix_tree_init_maxindex(void)
1420{
1421        unsigned int i;
1422
1423        for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
1424                height_to_maxindex[i] = __maxindex(i);
1425}
1426
1427static int radix_tree_callback(struct notifier_block *nfb,
1428                            unsigned long action,
1429                            void *hcpu)
1430{
1431       int cpu = (long)hcpu;
1432       struct radix_tree_preload *rtp;
1433
1434       /* Free per-cpu pool of perloaded nodes */
1435       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1436               rtp = &per_cpu(radix_tree_preloads, cpu);
1437               while (rtp->nr) {
1438                       kmem_cache_free(radix_tree_node_cachep,
1439                                       rtp->nodes[rtp->nr-1]);
1440                       rtp->nodes[rtp->nr-1] = NULL;
1441                       rtp->nr--;
1442               }
1443       }
1444       return NOTIFY_OK;
1445}
1446
1447void __init radix_tree_init(void)
1448{
1449        radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1450                        sizeof(struct radix_tree_node), 0,
1451                        SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1452                        radix_tree_node_ctor);
1453        radix_tree_init_maxindex();
1454        hotcpu_notifier(radix_tree_callback, 0);
1455}
1456