qemu/util/interval-tree.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2
   3#include "qemu/osdep.h"
   4#include "qemu/interval-tree.h"
   5#include "qemu/atomic.h"
   6
   7/*
   8 * Red Black Trees.
   9 *
  10 * For now, don't expose Linux Red-Black Trees separately, but retain the
  11 * separate type definitions to keep the implementation sane, and allow
  12 * the possibility of separating them later.
  13 *
  14 * Derived from include/linux/rbtree_augmented.h and its dependencies.
  15 */
  16
  17/*
  18 * red-black trees properties:  https://en.wikipedia.org/wiki/Rbtree
  19 *
  20 *  1) A node is either red or black
  21 *  2) The root is black
  22 *  3) All leaves (NULL) are black
  23 *  4) Both children of every red node are black
  24 *  5) Every simple path from root to leaves contains the same number
  25 *     of black nodes.
  26 *
  27 *  4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
  28 *  consecutive red nodes in a path and every red node is therefore followed by
  29 *  a black. So if B is the number of black nodes on every simple path (as per
  30 *  5), then the longest possible path due to 4 is 2B.
  31 *
  32 *  We shall indicate color with case, where black nodes are uppercase and red
  33 *  nodes will be lowercase. Unknown color nodes shall be drawn as red within
  34 *  parentheses and have some accompanying text comment.
  35 *
  36 * Notes on lockless lookups:
  37 *
  38 * All stores to the tree structure (rb_left and rb_right) must be done using
  39 * WRITE_ONCE [qatomic_set for QEMU]. And we must not inadvertently cause
  40 * (temporary) loops in the tree structure as seen in program order.
  41 *
  42 * These two requirements will allow lockless iteration of the tree -- not
  43 * correct iteration mind you, tree rotations are not atomic so a lookup might
  44 * miss entire subtrees.
  45 *
  46 * But they do guarantee that any such traversal will only see valid elements
  47 * and that it will indeed complete -- does not get stuck in a loop.
  48 *
  49 * It also guarantees that if the lookup returns an element it is the 'correct'
  50 * one. But not returning an element does _NOT_ mean it's not present.
  51 *
  52 * NOTE:
  53 *
  54 * Stores to __rb_parent_color are not important for simple lookups so those
  55 * are left undone as of now. Nor did I check for loops involving parent
  56 * pointers.
  57 */
  58
  59typedef enum RBColor
  60{
  61    RB_RED,
  62    RB_BLACK,
  63} RBColor;
  64
  65typedef struct RBAugmentCallbacks {
  66    void (*propagate)(RBNode *node, RBNode *stop);
  67    void (*copy)(RBNode *old, RBNode *new);
  68    void (*rotate)(RBNode *old, RBNode *new);
  69} RBAugmentCallbacks;
  70
  71static inline RBNode *rb_parent(const RBNode *n)
  72{
  73    return (RBNode *)(n->rb_parent_color & ~1);
  74}
  75
  76static inline RBNode *rb_red_parent(const RBNode *n)
  77{
  78    return (RBNode *)n->rb_parent_color;
  79}
  80
  81static inline RBColor pc_color(uintptr_t pc)
  82{
  83    return (RBColor)(pc & 1);
  84}
  85
  86static inline bool pc_is_red(uintptr_t pc)
  87{
  88    return pc_color(pc) == RB_RED;
  89}
  90
  91static inline bool pc_is_black(uintptr_t pc)
  92{
  93    return !pc_is_red(pc);
  94}
  95
  96static inline RBColor rb_color(const RBNode *n)
  97{
  98    return pc_color(n->rb_parent_color);
  99}
 100
 101static inline bool rb_is_red(const RBNode *n)
 102{
 103    return pc_is_red(n->rb_parent_color);
 104}
 105
 106static inline bool rb_is_black(const RBNode *n)
 107{
 108    return pc_is_black(n->rb_parent_color);
 109}
 110
 111static inline void rb_set_black(RBNode *n)
 112{
 113    n->rb_parent_color |= RB_BLACK;
 114}
 115
 116static inline void rb_set_parent_color(RBNode *n, RBNode *p, RBColor color)
 117{
 118    n->rb_parent_color = (uintptr_t)p | color;
 119}
 120
 121static inline void rb_set_parent(RBNode *n, RBNode *p)
 122{
 123    rb_set_parent_color(n, p, rb_color(n));
 124}
 125
 126static inline void rb_link_node(RBNode *node, RBNode *parent, RBNode **rb_link)
 127{
 128    node->rb_parent_color = (uintptr_t)parent;
 129    node->rb_left = node->rb_right = NULL;
 130
 131    qatomic_set(rb_link, node);
 132}
 133
 134static RBNode *rb_next(RBNode *node)
 135{
 136    RBNode *parent;
 137
 138    /* OMIT: if empty node, return null. */
 139
 140    /*
 141     * If we have a right-hand child, go down and then left as far as we can.
 142     */
 143    if (node->rb_right) {
 144        node = node->rb_right;
 145        while (node->rb_left) {
 146            node = node->rb_left;
 147        }
 148        return node;
 149    }
 150
 151    /*
 152     * No right-hand children. Everything down and left is smaller than us,
 153     * so any 'next' node must be in the general direction of our parent.
 154     * Go up the tree; any time the ancestor is a right-hand child of its
 155     * parent, keep going up. First time it's a left-hand child of its
 156     * parent, said parent is our 'next' node.
 157     */
 158    while ((parent = rb_parent(node)) && node == parent->rb_right) {
 159        node = parent;
 160    }
 161
 162    return parent;
 163}
 164
 165static inline void rb_change_child(RBNode *old, RBNode *new,
 166                                   RBNode *parent, RBRoot *root)
 167{
 168    if (!parent) {
 169        qatomic_set(&root->rb_node, new);
 170    } else if (parent->rb_left == old) {
 171        qatomic_set(&parent->rb_left, new);
 172    } else {
 173        qatomic_set(&parent->rb_right, new);
 174    }
 175}
 176
 177static inline void rb_rotate_set_parents(RBNode *old, RBNode *new,
 178                                         RBRoot *root, RBColor color)
 179{
 180    RBNode *parent = rb_parent(old);
 181
 182    new->rb_parent_color = old->rb_parent_color;
 183    rb_set_parent_color(old, new, color);
 184    rb_change_child(old, new, parent, root);
 185}
 186
 187static void rb_insert_augmented(RBNode *node, RBRoot *root,
 188                                const RBAugmentCallbacks *augment)
 189{
 190    RBNode *parent = rb_red_parent(node), *gparent, *tmp;
 191
 192    while (true) {
 193        /*
 194         * Loop invariant: node is red.
 195         */
 196        if (unlikely(!parent)) {
 197            /*
 198             * The inserted node is root. Either this is the first node, or
 199             * we recursed at Case 1 below and are no longer violating 4).
 200             */
 201            rb_set_parent_color(node, NULL, RB_BLACK);
 202            break;
 203        }
 204
 205        /*
 206         * If there is a black parent, we are done.  Otherwise, take some
 207         * corrective action as, per 4), we don't want a red root or two
 208         * consecutive red nodes.
 209         */
 210        if (rb_is_black(parent)) {
 211            break;
 212        }
 213
 214        gparent = rb_red_parent(parent);
 215
 216        tmp = gparent->rb_right;
 217        if (parent != tmp) {    /* parent == gparent->rb_left */
 218            if (tmp && rb_is_red(tmp)) {
 219                /*
 220                 * Case 1 - node's uncle is red (color flips).
 221                 *
 222                 *       G            g
 223                 *      / \          / \
 224                 *     p   u  -->   P   U
 225                 *    /            /
 226                 *   n            n
 227                 *
 228                 * However, since g's parent might be red, and 4) does not
 229                 * allow this, we need to recurse at g.
 230                 */
 231                rb_set_parent_color(tmp, gparent, RB_BLACK);
 232                rb_set_parent_color(parent, gparent, RB_BLACK);
 233                node = gparent;
 234                parent = rb_parent(node);
 235                rb_set_parent_color(node, parent, RB_RED);
 236                continue;
 237            }
 238
 239            tmp = parent->rb_right;
 240            if (node == tmp) {
 241                /*
 242                 * Case 2 - node's uncle is black and node is
 243                 * the parent's right child (left rotate at parent).
 244                 *
 245                 *      G             G
 246                 *     / \           / \
 247                 *    p   U  -->    n   U
 248                 *     \           /
 249                 *      n         p
 250                 *
 251                 * This still leaves us in violation of 4), the
 252                 * continuation into Case 3 will fix that.
 253                 */
 254                tmp = node->rb_left;
 255                qatomic_set(&parent->rb_right, tmp);
 256                qatomic_set(&node->rb_left, parent);
 257                if (tmp) {
 258                    rb_set_parent_color(tmp, parent, RB_BLACK);
 259                }
 260                rb_set_parent_color(parent, node, RB_RED);
 261                augment->rotate(parent, node);
 262                parent = node;
 263                tmp = node->rb_right;
 264            }
 265
 266            /*
 267             * Case 3 - node's uncle is black and node is
 268             * the parent's left child (right rotate at gparent).
 269             *
 270             *        G           P
 271             *       / \         / \
 272             *      p   U  -->  n   g
 273             *     /                 \
 274             *    n                   U
 275             */
 276            qatomic_set(&gparent->rb_left, tmp); /* == parent->rb_right */
 277            qatomic_set(&parent->rb_right, gparent);
 278            if (tmp) {
 279                rb_set_parent_color(tmp, gparent, RB_BLACK);
 280            }
 281            rb_rotate_set_parents(gparent, parent, root, RB_RED);
 282            augment->rotate(gparent, parent);
 283            break;
 284        } else {
 285            tmp = gparent->rb_left;
 286            if (tmp && rb_is_red(tmp)) {
 287                /* Case 1 - color flips */
 288                rb_set_parent_color(tmp, gparent, RB_BLACK);
 289                rb_set_parent_color(parent, gparent, RB_BLACK);
 290                node = gparent;
 291                parent = rb_parent(node);
 292                rb_set_parent_color(node, parent, RB_RED);
 293                continue;
 294            }
 295
 296            tmp = parent->rb_left;
 297            if (node == tmp) {
 298                /* Case 2 - right rotate at parent */
 299                tmp = node->rb_right;
 300                qatomic_set(&parent->rb_left, tmp);
 301                qatomic_set(&node->rb_right, parent);
 302                if (tmp) {
 303                    rb_set_parent_color(tmp, parent, RB_BLACK);
 304                }
 305                rb_set_parent_color(parent, node, RB_RED);
 306                augment->rotate(parent, node);
 307                parent = node;
 308                tmp = node->rb_left;
 309            }
 310
 311            /* Case 3 - left rotate at gparent */
 312            qatomic_set(&gparent->rb_right, tmp); /* == parent->rb_left */
 313            qatomic_set(&parent->rb_left, gparent);
 314            if (tmp) {
 315                rb_set_parent_color(tmp, gparent, RB_BLACK);
 316            }
 317            rb_rotate_set_parents(gparent, parent, root, RB_RED);
 318            augment->rotate(gparent, parent);
 319            break;
 320        }
 321    }
 322}
 323
 324static void rb_insert_augmented_cached(RBNode *node,
 325                                       RBRootLeftCached *root, bool newleft,
 326                                       const RBAugmentCallbacks *augment)
 327{
 328    if (newleft) {
 329        root->rb_leftmost = node;
 330    }
 331    rb_insert_augmented(node, &root->rb_root, augment);
 332}
 333
 334static void rb_erase_color(RBNode *parent, RBRoot *root,
 335                           const RBAugmentCallbacks *augment)
 336{
 337    RBNode *node = NULL, *sibling, *tmp1, *tmp2;
 338
 339    while (true) {
 340        /*
 341         * Loop invariants:
 342         * - node is black (or NULL on first iteration)
 343         * - node is not the root (parent is not NULL)
 344         * - All leaf paths going through parent and node have a
 345         *   black node count that is 1 lower than other leaf paths.
 346         */
 347        sibling = parent->rb_right;
 348        if (node != sibling) {  /* node == parent->rb_left */
 349            if (rb_is_red(sibling)) {
 350                /*
 351                 * Case 1 - left rotate at parent
 352                 *
 353                 *     P               S
 354                 *    / \             / \ 
 355                 *   N   s    -->    p   Sr
 356                 *      / \         / \ 
 357                 *     Sl  Sr      N   Sl
 358                 */
 359                tmp1 = sibling->rb_left;
 360                qatomic_set(&parent->rb_right, tmp1);
 361                qatomic_set(&sibling->rb_left, parent);
 362                rb_set_parent_color(tmp1, parent, RB_BLACK);
 363                rb_rotate_set_parents(parent, sibling, root, RB_RED);
 364                augment->rotate(parent, sibling);
 365                sibling = tmp1;
 366            }
 367            tmp1 = sibling->rb_right;
 368            if (!tmp1 || rb_is_black(tmp1)) {
 369                tmp2 = sibling->rb_left;
 370                if (!tmp2 || rb_is_black(tmp2)) {
 371                    /*
 372                     * Case 2 - sibling color flip
 373                     * (p could be either color here)
 374                     *
 375                     *    (p)           (p)
 376                     *    / \           / \ 
 377                     *   N   S    -->  N   s
 378                     *      / \           / \ 
 379                     *     Sl  Sr        Sl  Sr
 380                     *
 381                     * This leaves us violating 5) which
 382                     * can be fixed by flipping p to black
 383                     * if it was red, or by recursing at p.
 384                     * p is red when coming from Case 1.
 385                     */
 386                    rb_set_parent_color(sibling, parent, RB_RED);
 387                    if (rb_is_red(parent)) {
 388                        rb_set_black(parent);
 389                    } else {
 390                        node = parent;
 391                        parent = rb_parent(node);
 392                        if (parent) {
 393                            continue;
 394                        }
 395                    }
 396                    break;
 397                }
 398                /*
 399                 * Case 3 - right rotate at sibling
 400                 * (p could be either color here)
 401                 *
 402                 *   (p)           (p)
 403                 *   / \           / \
 404                 *  N   S    -->  N   sl
 405                 *     / \             \
 406                 *    sl  Sr            S
 407                 *                       \
 408                 *                        Sr
 409                 *
 410                 * Note: p might be red, and then bot
 411                 * p and sl are red after rotation (which
 412                 * breaks property 4). This is fixed in
 413                 * Case 4 (in rb_rotate_set_parents()
 414                 *         which set sl the color of p
 415                 *         and set p RB_BLACK)
 416                 *
 417                 *   (p)            (sl)
 418                 *   / \            /  \
 419                 *  N   sl   -->   P    S
 420                 *       \        /      \
 421                 *        S      N        Sr
 422                 *         \
 423                 *          Sr
 424                 */
 425                tmp1 = tmp2->rb_right;
 426                qatomic_set(&sibling->rb_left, tmp1);
 427                qatomic_set(&tmp2->rb_right, sibling);
 428                qatomic_set(&parent->rb_right, tmp2);
 429                if (tmp1) {
 430                    rb_set_parent_color(tmp1, sibling, RB_BLACK);
 431                }
 432                augment->rotate(sibling, tmp2);
 433                tmp1 = sibling;
 434                sibling = tmp2;
 435            }
 436            /*
 437             * Case 4 - left rotate at parent + color flips
 438             * (p and sl could be either color here.
 439             *  After rotation, p becomes black, s acquires
 440             *  p's color, and sl keeps its color)
 441             *
 442             *      (p)             (s)
 443             *      / \             / \
 444             *     N   S     -->   P   Sr
 445             *        / \         / \
 446             *      (sl) sr      N  (sl)
 447             */
 448            tmp2 = sibling->rb_left;
 449            qatomic_set(&parent->rb_right, tmp2);
 450            qatomic_set(&sibling->rb_left, parent);
 451            rb_set_parent_color(tmp1, sibling, RB_BLACK);
 452            if (tmp2) {
 453                rb_set_parent(tmp2, parent);
 454            }
 455            rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
 456            augment->rotate(parent, sibling);
 457            break;
 458        } else {
 459            sibling = parent->rb_left;
 460            if (rb_is_red(sibling)) {
 461                /* Case 1 - right rotate at parent */
 462                tmp1 = sibling->rb_right;
 463                qatomic_set(&parent->rb_left, tmp1);
 464                qatomic_set(&sibling->rb_right, parent);
 465                rb_set_parent_color(tmp1, parent, RB_BLACK);
 466                rb_rotate_set_parents(parent, sibling, root, RB_RED);
 467                augment->rotate(parent, sibling);
 468                sibling = tmp1;
 469            }
 470            tmp1 = sibling->rb_left;
 471            if (!tmp1 || rb_is_black(tmp1)) {
 472                tmp2 = sibling->rb_right;
 473                if (!tmp2 || rb_is_black(tmp2)) {
 474                    /* Case 2 - sibling color flip */
 475                    rb_set_parent_color(sibling, parent, RB_RED);
 476                    if (rb_is_red(parent)) {
 477                        rb_set_black(parent);
 478                    } else {
 479                        node = parent;
 480                        parent = rb_parent(node);
 481                        if (parent) {
 482                            continue;
 483                        }
 484                    }
 485                    break;
 486                }
 487                /* Case 3 - left rotate at sibling */
 488                tmp1 = tmp2->rb_left;
 489                qatomic_set(&sibling->rb_right, tmp1);
 490                qatomic_set(&tmp2->rb_left, sibling);
 491                qatomic_set(&parent->rb_left, tmp2);
 492                if (tmp1) {
 493                    rb_set_parent_color(tmp1, sibling, RB_BLACK);
 494                }
 495                augment->rotate(sibling, tmp2);
 496                tmp1 = sibling;
 497                sibling = tmp2;
 498            }
 499            /* Case 4 - right rotate at parent + color flips */
 500            tmp2 = sibling->rb_right;
 501            qatomic_set(&parent->rb_left, tmp2);
 502            qatomic_set(&sibling->rb_right, parent);
 503            rb_set_parent_color(tmp1, sibling, RB_BLACK);
 504            if (tmp2) {
 505                rb_set_parent(tmp2, parent);
 506            }
 507            rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
 508            augment->rotate(parent, sibling);
 509            break;
 510        }
 511    }
 512}
 513
 514static void rb_erase_augmented(RBNode *node, RBRoot *root,
 515                               const RBAugmentCallbacks *augment)
 516{
 517    RBNode *child = node->rb_right;
 518    RBNode *tmp = node->rb_left;
 519    RBNode *parent, *rebalance;
 520    uintptr_t pc;
 521
 522    if (!tmp) {
 523        /*
 524         * Case 1: node to erase has no more than 1 child (easy!)
 525         *
 526         * Note that if there is one child it must be red due to 5)
 527         * and node must be black due to 4). We adjust colors locally
 528         * so as to bypass rb_erase_color() later on.
 529         */
 530        pc = node->rb_parent_color;
 531        parent = rb_parent(node);
 532        rb_change_child(node, child, parent, root);
 533        if (child) {
 534            child->rb_parent_color = pc;
 535            rebalance = NULL;
 536        } else {
 537            rebalance = pc_is_black(pc) ? parent : NULL;
 538        }
 539        tmp = parent;
 540    } else if (!child) {
 541        /* Still case 1, but this time the child is node->rb_left */
 542        pc = node->rb_parent_color;
 543        parent = rb_parent(node);
 544        tmp->rb_parent_color = pc;
 545        rb_change_child(node, tmp, parent, root);
 546        rebalance = NULL;
 547        tmp = parent;
 548    } else {
 549        RBNode *successor = child, *child2;
 550        tmp = child->rb_left;
 551        if (!tmp) {
 552            /*
 553             * Case 2: node's successor is its right child
 554             *
 555             *    (n)          (s)
 556             *    / \          / \
 557             *  (x) (s)  ->  (x) (c)
 558             *        \
 559             *        (c)
 560             */
 561            parent = successor;
 562            child2 = successor->rb_right;
 563
 564            augment->copy(node, successor);
 565        } else {
 566            /*
 567             * Case 3: node's successor is leftmost under
 568             * node's right child subtree
 569             *
 570             *    (n)          (s)
 571             *    / \          / \
 572             *  (x) (y)  ->  (x) (y)
 573             *      /            /
 574             *    (p)          (p)
 575             *    /            /
 576             *  (s)          (c)
 577             *    \
 578             *    (c)
 579             */
 580            do {
 581                parent = successor;
 582                successor = tmp;
 583                tmp = tmp->rb_left;
 584            } while (tmp);
 585            child2 = successor->rb_right;
 586            qatomic_set(&parent->rb_left, child2);
 587            qatomic_set(&successor->rb_right, child);
 588            rb_set_parent(child, successor);
 589
 590            augment->copy(node, successor);
 591            augment->propagate(parent, successor);
 592        }
 593
 594        tmp = node->rb_left;
 595        qatomic_set(&successor->rb_left, tmp);
 596        rb_set_parent(tmp, successor);
 597
 598        pc = node->rb_parent_color;
 599        tmp = rb_parent(node);
 600        rb_change_child(node, successor, tmp, root);
 601
 602        if (child2) {
 603            rb_set_parent_color(child2, parent, RB_BLACK);
 604            rebalance = NULL;
 605        } else {
 606            rebalance = rb_is_black(successor) ? parent : NULL;
 607        }
 608        successor->rb_parent_color = pc;
 609        tmp = successor;
 610    }
 611
 612    augment->propagate(tmp, NULL);
 613
 614    if (rebalance) {
 615        rb_erase_color(rebalance, root, augment);
 616    }
 617}
 618
 619static void rb_erase_augmented_cached(RBNode *node, RBRootLeftCached *root,
 620                                      const RBAugmentCallbacks *augment)
 621{
 622    if (root->rb_leftmost == node) {
 623        root->rb_leftmost = rb_next(node);
 624    }
 625    rb_erase_augmented(node, &root->rb_root, augment);
 626}
 627
 628
 629/*
 630 * Interval trees.
 631 *
 632 * Derived from lib/interval_tree.c and its dependencies,
 633 * especially include/linux/interval_tree_generic.h.
 634 */
 635
 636#define rb_to_itree(N)  container_of(N, IntervalTreeNode, rb)
 637
 638static bool interval_tree_compute_max(IntervalTreeNode *node, bool exit)
 639{
 640    IntervalTreeNode *child;
 641    uint64_t max = node->last;
 642
 643    if (node->rb.rb_left) {
 644        child = rb_to_itree(node->rb.rb_left);
 645        if (child->subtree_last > max) {
 646            max = child->subtree_last;
 647        }
 648    }
 649    if (node->rb.rb_right) {
 650        child = rb_to_itree(node->rb.rb_right);
 651        if (child->subtree_last > max) {
 652            max = child->subtree_last;
 653        }
 654    }
 655    if (exit && node->subtree_last == max) {
 656        return true;
 657    }
 658    node->subtree_last = max;
 659    return false;
 660}
 661
 662static void interval_tree_propagate(RBNode *rb, RBNode *stop)
 663{
 664    while (rb != stop) {
 665        IntervalTreeNode *node = rb_to_itree(rb);
 666        if (interval_tree_compute_max(node, true)) {
 667            break;
 668        }
 669        rb = rb_parent(&node->rb);
 670    }
 671}
 672
 673static void interval_tree_copy(RBNode *rb_old, RBNode *rb_new)
 674{
 675    IntervalTreeNode *old = rb_to_itree(rb_old);
 676    IntervalTreeNode *new = rb_to_itree(rb_new);
 677
 678    new->subtree_last = old->subtree_last;
 679}
 680
 681static void interval_tree_rotate(RBNode *rb_old, RBNode *rb_new)
 682{
 683    IntervalTreeNode *old = rb_to_itree(rb_old);
 684    IntervalTreeNode *new = rb_to_itree(rb_new);
 685
 686    new->subtree_last = old->subtree_last;
 687    interval_tree_compute_max(old, false);
 688}
 689
 690static const RBAugmentCallbacks interval_tree_augment = {
 691    .propagate = interval_tree_propagate,
 692    .copy = interval_tree_copy,
 693    .rotate = interval_tree_rotate,
 694};
 695
 696/* Insert / remove interval nodes from the tree */
 697void interval_tree_insert(IntervalTreeNode *node, IntervalTreeRoot *root)
 698{
 699    RBNode **link = &root->rb_root.rb_node, *rb_parent = NULL;
 700    uint64_t start = node->start, last = node->last;
 701    IntervalTreeNode *parent;
 702    bool leftmost = true;
 703
 704    while (*link) {
 705        rb_parent = *link;
 706        parent = rb_to_itree(rb_parent);
 707
 708        if (parent->subtree_last < last) {
 709            parent->subtree_last = last;
 710        }
 711        if (start < parent->start) {
 712            link = &parent->rb.rb_left;
 713        } else {
 714            link = &parent->rb.rb_right;
 715            leftmost = false;
 716        }
 717    }
 718
 719    node->subtree_last = last;
 720    rb_link_node(&node->rb, rb_parent, link);
 721    rb_insert_augmented_cached(&node->rb, root, leftmost,
 722                               &interval_tree_augment);
 723}
 724
 725void interval_tree_remove(IntervalTreeNode *node, IntervalTreeRoot *root)
 726{
 727    rb_erase_augmented_cached(&node->rb, root, &interval_tree_augment);
 728}
 729
 730/*
 731 * Iterate over intervals intersecting [start;last]
 732 *
 733 * Note that a node's interval intersects [start;last] iff:
 734 *   Cond1: node->start <= last
 735 * and
 736 *   Cond2: start <= node->last
 737 */
 738
 739static IntervalTreeNode *interval_tree_subtree_search(IntervalTreeNode *node,
 740                                                      uint64_t start,
 741                                                      uint64_t last)
 742{
 743    while (true) {
 744        /*
 745         * Loop invariant: start <= node->subtree_last
 746         * (Cond2 is satisfied by one of the subtree nodes)
 747         */
 748        if (node->rb.rb_left) {
 749            IntervalTreeNode *left = rb_to_itree(node->rb.rb_left);
 750
 751            if (start <= left->subtree_last) {
 752                /*
 753                 * Some nodes in left subtree satisfy Cond2.
 754                 * Iterate to find the leftmost such node N.
 755                 * If it also satisfies Cond1, that's the
 756                 * match we are looking for. Otherwise, there
 757                 * is no matching interval as nodes to the
 758                 * right of N can't satisfy Cond1 either.
 759                 */
 760                node = left;
 761                continue;
 762            }
 763        }
 764        if (node->start <= last) {         /* Cond1 */
 765            if (start <= node->last) {     /* Cond2 */
 766                return node; /* node is leftmost match */
 767            }
 768            if (node->rb.rb_right) {
 769                node = rb_to_itree(node->rb.rb_right);
 770                if (start <= node->subtree_last) {
 771                    continue;
 772                }
 773            }
 774        }
 775        return NULL; /* no match */
 776    }
 777}
 778
 779IntervalTreeNode *interval_tree_iter_first(IntervalTreeRoot *root,
 780                                           uint64_t start, uint64_t last)
 781{
 782    IntervalTreeNode *node, *leftmost;
 783
 784    if (!root->rb_root.rb_node) {
 785        return NULL;
 786    }
 787
 788    /*
 789     * Fastpath range intersection/overlap between A: [a0, a1] and
 790     * B: [b0, b1] is given by:
 791     *
 792     *         a0 <= b1 && b0 <= a1
 793     *
 794     *  ... where A holds the lock range and B holds the smallest
 795     * 'start' and largest 'last' in the tree. For the later, we
 796     * rely on the root node, which by augmented interval tree
 797     * property, holds the largest value in its last-in-subtree.
 798     * This allows mitigating some of the tree walk overhead for
 799     * for non-intersecting ranges, maintained and consulted in O(1).
 800     */
 801    node = rb_to_itree(root->rb_root.rb_node);
 802    if (node->subtree_last < start) {
 803        return NULL;
 804    }
 805
 806    leftmost = rb_to_itree(root->rb_leftmost);
 807    if (leftmost->start > last) {
 808        return NULL;
 809    }
 810
 811    return interval_tree_subtree_search(node, start, last);
 812}
 813
 814IntervalTreeNode *interval_tree_iter_next(IntervalTreeNode *node,
 815                                          uint64_t start, uint64_t last)
 816{
 817    RBNode *rb = node->rb.rb_right, *prev;
 818
 819    while (true) {
 820        /*
 821         * Loop invariants:
 822         *   Cond1: node->start <= last
 823         *   rb == node->rb.rb_right
 824         *
 825         * First, search right subtree if suitable
 826         */
 827        if (rb) {
 828            IntervalTreeNode *right = rb_to_itree(rb);
 829
 830            if (start <= right->subtree_last) {
 831                return interval_tree_subtree_search(right, start, last);
 832            }
 833        }
 834
 835        /* Move up the tree until we come from a node's left child */
 836        do {
 837            rb = rb_parent(&node->rb);
 838            if (!rb) {
 839                return NULL;
 840            }
 841            prev = &node->rb;
 842            node = rb_to_itree(rb);
 843            rb = node->rb.rb_right;
 844        } while (prev == rb);
 845
 846        /* Check if the node intersects [start;last] */
 847        if (last < node->start) {  /* !Cond1 */
 848            return NULL;
 849        }
 850        if (start <= node->last) { /* Cond2 */
 851            return node;
 852        }
 853    }
 854}
 855
 856/* Occasionally useful for calling from within the debugger. */
 857#if 0
 858static void debug_interval_tree_int(IntervalTreeNode *node,
 859                                    const char *dir, int level)
 860{
 861    printf("%4d %*s %s [%" PRIu64 ",%" PRIu64 "] subtree_last:%" PRIu64 "\n",
 862           level, level + 1, dir, rb_is_red(&node->rb) ? "r" : "b",
 863           node->start, node->last, node->subtree_last);
 864
 865    if (node->rb.rb_left) {
 866        debug_interval_tree_int(rb_to_itree(node->rb.rb_left), "<", level + 1);
 867    }
 868    if (node->rb.rb_right) {
 869        debug_interval_tree_int(rb_to_itree(node->rb.rb_right), ">", level + 1);
 870    }
 871}
 872
 873void debug_interval_tree(IntervalTreeNode *node);
 874void debug_interval_tree(IntervalTreeNode *node)
 875{
 876    if (node) {
 877        debug_interval_tree_int(node, "*", 0);
 878    } else {
 879        printf("null\n");
 880    }
 881}
 882#endif
 883