linux/lib/prio_tree.c
<<
>>
Prefs
   1/*
   2 * lib/prio_tree.c - priority search tree
   3 *
   4 * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
   5 *
   6 * This file is released under the GPL v2.
   7 *
   8 * Based on the radix priority search tree proposed by Edward M. McCreight
   9 * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
  10 *
  11 * 02Feb2004    Initial version
  12 */
  13
  14#include <linux/init.h>
  15#include <linux/mm.h>
  16#include <linux/prio_tree.h>
  17
  18/*
  19 * A clever mix of heap and radix trees forms a radix priority search tree (PST)
  20 * which is useful for storing intervals, e.g, we can consider a vma as a closed
  21 * interval of file pages [offset_begin, offset_end], and store all vmas that
  22 * map a file in a PST. Then, using the PST, we can answer a stabbing query,
  23 * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
  24 * given input interval X (a set of consecutive file pages), in "O(log n + m)"
  25 * time where 'log n' is the height of the PST, and 'm' is the number of stored
  26 * intervals (vmas) that overlap (map) with the input interval X (the set of
  27 * consecutive file pages).
  28 *
  29 * In our implementation, we store closed intervals of the form [radix_index,
  30 * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
  31 * is designed for storing intervals with unique radix indices, i.e., each
  32 * interval have different radix_index. However, this limitation can be easily
  33 * overcome by using the size, i.e., heap_index - radix_index, as part of the
  34 * index, so we index the tree using [(radix_index,size), heap_index].
  35 *
  36 * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
  37 * machine, the maximum height of a PST can be 64. We can use a balanced version
  38 * of the priority search tree to optimize the tree height, but the balanced
  39 * tree proposed by McCreight is too complex and memory-hungry for our purpose.
  40 */
  41
  42/*
  43 * The following macros are used for implementing prio_tree for i_mmap
  44 */
  45
  46#define RADIX_INDEX(vma)  ((vma)->vm_pgoff)
  47#define VMA_SIZE(vma)     (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
  48/* avoid overflow */
  49#define HEAP_INDEX(vma)   ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
  50
  51
  52static void get_index(const struct prio_tree_root *root,
  53    const struct prio_tree_node *node,
  54    unsigned long *radix, unsigned long *heap)
  55{
  56        if (root->raw) {
  57                struct vm_area_struct *vma = prio_tree_entry(
  58                    node, struct vm_area_struct, shared.prio_tree_node);
  59
  60                *radix = RADIX_INDEX(vma);
  61                *heap = HEAP_INDEX(vma);
  62        }
  63        else {
  64                *radix = node->start;
  65                *heap = node->last;
  66        }
  67}
  68
  69static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
  70
  71void __init prio_tree_init(void)
  72{
  73        unsigned int i;
  74
  75        for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
  76                index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
  77        index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
  78}
  79
  80/*
  81 * Maximum heap_index that can be stored in a PST with index_bits bits
  82 */
  83static inline unsigned long prio_tree_maxindex(unsigned int bits)
  84{
  85        return index_bits_to_maxindex[bits - 1];
  86}
  87
  88/*
  89 * Extend a priority search tree so that it can store a node with heap_index
  90 * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
  91 * However, this function is used rarely and the common case performance is
  92 * not bad.
  93 */
  94static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
  95                struct prio_tree_node *node, unsigned long max_heap_index)
  96{
  97        struct prio_tree_node *first = NULL, *prev, *last = NULL;
  98
  99        if (max_heap_index > prio_tree_maxindex(root->index_bits))
 100                root->index_bits++;
 101
 102        while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
 103                root->index_bits++;
 104
 105                if (prio_tree_empty(root))
 106                        continue;
 107
 108                if (first == NULL) {
 109                        first = root->prio_tree_node;
 110                        prio_tree_remove(root, root->prio_tree_node);
 111                        INIT_PRIO_TREE_NODE(first);
 112                        last = first;
 113                } else {
 114                        prev = last;
 115                        last = root->prio_tree_node;
 116                        prio_tree_remove(root, root->prio_tree_node);
 117                        INIT_PRIO_TREE_NODE(last);
 118                        prev->left = last;
 119                        last->parent = prev;
 120                }
 121        }
 122
 123        INIT_PRIO_TREE_NODE(node);
 124
 125        if (first) {
 126                node->left = first;
 127                first->parent = node;
 128        } else
 129                last = node;
 130
 131        if (!prio_tree_empty(root)) {
 132                last->left = root->prio_tree_node;
 133                last->left->parent = last;
 134        }
 135
 136        root->prio_tree_node = node;
 137        return node;
 138}
 139
 140/*
 141 * Replace a prio_tree_node with a new node and return the old node
 142 */
 143struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
 144                struct prio_tree_node *old, struct prio_tree_node *node)
 145{
 146        INIT_PRIO_TREE_NODE(node);
 147
 148        if (prio_tree_root(old)) {
 149                BUG_ON(root->prio_tree_node != old);
 150                /*
 151                 * We can reduce root->index_bits here. However, it is complex
 152                 * and does not help much to improve performance (IMO).
 153                 */
 154                node->parent = node;
 155                root->prio_tree_node = node;
 156        } else {
 157                node->parent = old->parent;
 158                if (old->parent->left == old)
 159                        old->parent->left = node;
 160                else
 161                        old->parent->right = node;
 162        }
 163
 164        if (!prio_tree_left_empty(old)) {
 165                node->left = old->left;
 166                old->left->parent = node;
 167        }
 168
 169        if (!prio_tree_right_empty(old)) {
 170                node->right = old->right;
 171                old->right->parent = node;
 172        }
 173
 174        return old;
 175}
 176
 177/*
 178 * Insert a prio_tree_node @node into a radix priority search tree @root. The
 179 * algorithm typically takes O(log n) time where 'log n' is the number of bits
 180 * required to represent the maximum heap_index. In the worst case, the algo
 181 * can take O((log n)^2) - check prio_tree_expand.
 182 *
 183 * If a prior node with same radix_index and heap_index is already found in
 184 * the tree, then returns the address of the prior node. Otherwise, inserts
 185 * @node into the tree and returns @node.
 186 */
 187struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
 188                struct prio_tree_node *node)
 189{
 190        struct prio_tree_node *cur, *res = node;
 191        unsigned long radix_index, heap_index;
 192        unsigned long r_index, h_index, index, mask;
 193        int size_flag = 0;
 194
 195        get_index(root, node, &radix_index, &heap_index);
 196
 197        if (prio_tree_empty(root) ||
 198                        heap_index > prio_tree_maxindex(root->index_bits))
 199                return prio_tree_expand(root, node, heap_index);
 200
 201        cur = root->prio_tree_node;
 202        mask = 1UL << (root->index_bits - 1);
 203
 204        while (mask) {
 205                get_index(root, cur, &r_index, &h_index);
 206
 207                if (r_index == radix_index && h_index == heap_index)
 208                        return cur;
 209
 210                if (h_index < heap_index ||
 211                    (h_index == heap_index && r_index > radix_index)) {
 212                        struct prio_tree_node *tmp = node;
 213                        node = prio_tree_replace(root, cur, node);
 214                        cur = tmp;
 215                        /* swap indices */
 216                        index = r_index;
 217                        r_index = radix_index;
 218                        radix_index = index;
 219                        index = h_index;
 220                        h_index = heap_index;
 221                        heap_index = index;
 222                }
 223
 224                if (size_flag)
 225                        index = heap_index - radix_index;
 226                else
 227                        index = radix_index;
 228
 229                if (index & mask) {
 230                        if (prio_tree_right_empty(cur)) {
 231                                INIT_PRIO_TREE_NODE(node);
 232                                cur->right = node;
 233                                node->parent = cur;
 234                                return res;
 235                        } else
 236                                cur = cur->right;
 237                } else {
 238                        if (prio_tree_left_empty(cur)) {
 239                                INIT_PRIO_TREE_NODE(node);
 240                                cur->left = node;
 241                                node->parent = cur;
 242                                return res;
 243                        } else
 244                                cur = cur->left;
 245                }
 246
 247                mask >>= 1;
 248
 249                if (!mask) {
 250                        mask = 1UL << (BITS_PER_LONG - 1);
 251                        size_flag = 1;
 252                }
 253        }
 254        /* Should not reach here */
 255        BUG();
 256        return NULL;
 257}
 258
 259/*
 260 * Remove a prio_tree_node @node from a radix priority search tree @root. The
 261 * algorithm takes O(log n) time where 'log n' is the number of bits required
 262 * to represent the maximum heap_index.
 263 */
 264void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
 265{
 266        struct prio_tree_node *cur;
 267        unsigned long r_index, h_index_right, h_index_left;
 268
 269        cur = node;
 270
 271        while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
 272                if (!prio_tree_left_empty(cur))
 273                        get_index(root, cur->left, &r_index, &h_index_left);
 274                else {
 275                        cur = cur->right;
 276                        continue;
 277                }
 278
 279                if (!prio_tree_right_empty(cur))
 280                        get_index(root, cur->right, &r_index, &h_index_right);
 281                else {
 282                        cur = cur->left;
 283                        continue;
 284                }
 285
 286                /* both h_index_left and h_index_right cannot be 0 */
 287                if (h_index_left >= h_index_right)
 288                        cur = cur->left;
 289                else
 290                        cur = cur->right;
 291        }
 292
 293        if (prio_tree_root(cur)) {
 294                BUG_ON(root->prio_tree_node != cur);
 295                __INIT_PRIO_TREE_ROOT(root, root->raw);
 296                return;
 297        }
 298
 299        if (cur->parent->right == cur)
 300                cur->parent->right = cur->parent;
 301        else
 302                cur->parent->left = cur->parent;
 303
 304        while (cur != node)
 305                cur = prio_tree_replace(root, cur->parent, cur);
 306}
 307
 308/*
 309 * Following functions help to enumerate all prio_tree_nodes in the tree that
 310 * overlap with the input interval X [radix_index, heap_index]. The enumeration
 311 * takes O(log n + m) time where 'log n' is the height of the tree (which is
 312 * proportional to # of bits required to represent the maximum heap_index) and
 313 * 'm' is the number of prio_tree_nodes that overlap the interval X.
 314 */
 315
 316static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
 317                unsigned long *r_index, unsigned long *h_index)
 318{
 319        if (prio_tree_left_empty(iter->cur))
 320                return NULL;
 321
 322        get_index(iter->root, iter->cur->left, r_index, h_index);
 323
 324        if (iter->r_index <= *h_index) {
 325                iter->cur = iter->cur->left;
 326                iter->mask >>= 1;
 327                if (iter->mask) {
 328                        if (iter->size_level)
 329                                iter->size_level++;
 330                } else {
 331                        if (iter->size_level) {
 332                                BUG_ON(!prio_tree_left_empty(iter->cur));
 333                                BUG_ON(!prio_tree_right_empty(iter->cur));
 334                                iter->size_level++;
 335                                iter->mask = ULONG_MAX;
 336                        } else {
 337                                iter->size_level = 1;
 338                                iter->mask = 1UL << (BITS_PER_LONG - 1);
 339                        }
 340                }
 341                return iter->cur;
 342        }
 343
 344        return NULL;
 345}
 346
 347static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
 348                unsigned long *r_index, unsigned long *h_index)
 349{
 350        unsigned long value;
 351
 352        if (prio_tree_right_empty(iter->cur))
 353                return NULL;
 354
 355        if (iter->size_level)
 356                value = iter->value;
 357        else
 358                value = iter->value | iter->mask;
 359
 360        if (iter->h_index < value)
 361                return NULL;
 362
 363        get_index(iter->root, iter->cur->right, r_index, h_index);
 364
 365        if (iter->r_index <= *h_index) {
 366                iter->cur = iter->cur->right;
 367                iter->mask >>= 1;
 368                iter->value = value;
 369                if (iter->mask) {
 370                        if (iter->size_level)
 371                                iter->size_level++;
 372                } else {
 373                        if (iter->size_level) {
 374                                BUG_ON(!prio_tree_left_empty(iter->cur));
 375                                BUG_ON(!prio_tree_right_empty(iter->cur));
 376                                iter->size_level++;
 377                                iter->mask = ULONG_MAX;
 378                        } else {
 379                                iter->size_level = 1;
 380                                iter->mask = 1UL << (BITS_PER_LONG - 1);
 381                        }
 382                }
 383                return iter->cur;
 384        }
 385
 386        return NULL;
 387}
 388
 389static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
 390{
 391        iter->cur = iter->cur->parent;
 392        if (iter->mask == ULONG_MAX)
 393                iter->mask = 1UL;
 394        else if (iter->size_level == 1)
 395                iter->mask = 1UL;
 396        else
 397                iter->mask <<= 1;
 398        if (iter->size_level)
 399                iter->size_level--;
 400        if (!iter->size_level && (iter->value & iter->mask))
 401                iter->value ^= iter->mask;
 402        return iter->cur;
 403}
 404
 405static inline int overlap(struct prio_tree_iter *iter,
 406                unsigned long r_index, unsigned long h_index)
 407{
 408        return iter->h_index >= r_index && iter->r_index <= h_index;
 409}
 410
 411/*
 412 * prio_tree_first:
 413 *
 414 * Get the first prio_tree_node that overlaps with the interval [radix_index,
 415 * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
 416 * traversal of the tree.
 417 */
 418static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter)
 419{
 420        struct prio_tree_root *root;
 421        unsigned long r_index, h_index;
 422
 423        INIT_PRIO_TREE_ITER(iter);
 424
 425        root = iter->root;
 426        if (prio_tree_empty(root))
 427                return NULL;
 428
 429        get_index(root, root->prio_tree_node, &r_index, &h_index);
 430
 431        if (iter->r_index > h_index)
 432                return NULL;
 433
 434        iter->mask = 1UL << (root->index_bits - 1);
 435        iter->cur = root->prio_tree_node;
 436
 437        while (1) {
 438                if (overlap(iter, r_index, h_index))
 439                        return iter->cur;
 440
 441                if (prio_tree_left(iter, &r_index, &h_index))
 442                        continue;
 443
 444                if (prio_tree_right(iter, &r_index, &h_index))
 445                        continue;
 446
 447                break;
 448        }
 449        return NULL;
 450}
 451
 452/*
 453 * prio_tree_next:
 454 *
 455 * Get the next prio_tree_node that overlaps with the input interval in iter
 456 */
 457struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter)
 458{
 459        unsigned long r_index, h_index;
 460
 461        if (iter->cur == NULL)
 462                return prio_tree_first(iter);
 463
 464repeat:
 465        while (prio_tree_left(iter, &r_index, &h_index))
 466                if (overlap(iter, r_index, h_index))
 467                        return iter->cur;
 468
 469        while (!prio_tree_right(iter, &r_index, &h_index)) {
 470                while (!prio_tree_root(iter->cur) &&
 471                                iter->cur->parent->right == iter->cur)
 472                        prio_tree_parent(iter);
 473
 474                if (prio_tree_root(iter->cur))
 475                        return NULL;
 476
 477                prio_tree_parent(iter);
 478        }
 479
 480        if (overlap(iter, r_index, h_index))
 481                return iter->cur;
 482
 483        goto repeat;
 484}
 485