1
2
3
4
5
6
7#include "dm-btree-internal.h"
8#include "dm-space-map.h"
9#include "dm-transaction-manager.h"
10
11#include <linux/export.h>
12#include <linux/device-mapper.h>
13
14#define DM_MSG_PREFIX "btree"
15
16
17
18
19static void memcpy_disk(void *dest, const void *src, size_t len)
20 __dm_written_to_disk(src)
21{
22 memcpy(dest, src, len);
23 __dm_unbless_for_disk(src);
24}
25
26static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
27 unsigned index, void *elt)
28 __dm_written_to_disk(elt)
29{
30 if (index < nr_elts)
31 memmove(base + (elt_size * (index + 1)),
32 base + (elt_size * index),
33 (nr_elts - index) * elt_size);
34
35 memcpy_disk(base + (elt_size * index), elt, elt_size);
36}
37
38
39
40
41static int bsearch(struct btree_node *n, uint64_t key, int want_hi)
42{
43 int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
44
45 while (hi - lo > 1) {
46 int mid = lo + ((hi - lo) / 2);
47 uint64_t mid_key = le64_to_cpu(n->keys[mid]);
48
49 if (mid_key == key)
50 return mid;
51
52 if (mid_key < key)
53 lo = mid;
54 else
55 hi = mid;
56 }
57
58 return want_hi ? hi : lo;
59}
60
61int lower_bound(struct btree_node *n, uint64_t key)
62{
63 return bsearch(n, key, 0);
64}
65
66static int upper_bound(struct btree_node *n, uint64_t key)
67{
68 return bsearch(n, key, 1);
69}
70
71void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
72 struct dm_btree_value_type *vt)
73{
74 unsigned i;
75 uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
76
77 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
78 for (i = 0; i < nr_entries; i++)
79 dm_tm_inc(tm, value64(n, i));
80 else if (vt->inc)
81 for (i = 0; i < nr_entries; i++)
82 vt->inc(vt->context, value_ptr(n, i));
83}
84
85static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
86 uint64_t key, void *value)
87 __dm_written_to_disk(value)
88{
89 uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
90 __le64 key_le = cpu_to_le64(key);
91
92 if (index > nr_entries ||
93 index >= le32_to_cpu(node->header.max_entries)) {
94 DMERR("too many entries in btree node for insert");
95 __dm_unbless_for_disk(value);
96 return -ENOMEM;
97 }
98
99 __dm_bless_for_disk(&key_le);
100
101 array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
102 array_insert(value_base(node), value_size, nr_entries, index, value);
103 node->header.nr_entries = cpu_to_le32(nr_entries + 1);
104
105 return 0;
106}
107
108
109
110
111
112
113
114static uint32_t calc_max_entries(size_t value_size, size_t block_size)
115{
116 uint32_t total, n;
117 size_t elt_size = sizeof(uint64_t) + value_size;
118
119 block_size -= sizeof(struct node_header);
120 total = block_size / elt_size;
121 n = total / 3;
122
123 return 3 * n;
124}
125
126int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
127{
128 int r;
129 struct dm_block *b;
130 struct btree_node *n;
131 size_t block_size;
132 uint32_t max_entries;
133
134 r = new_block(info, &b);
135 if (r < 0)
136 return r;
137
138 block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
139 max_entries = calc_max_entries(info->value_type.size, block_size);
140
141 n = dm_block_data(b);
142 memset(n, 0, block_size);
143 n->header.flags = cpu_to_le32(LEAF_NODE);
144 n->header.nr_entries = cpu_to_le32(0);
145 n->header.max_entries = cpu_to_le32(max_entries);
146 n->header.value_size = cpu_to_le32(info->value_type.size);
147
148 *root = dm_block_location(b);
149 unlock_block(info, b);
150
151 return 0;
152}
153EXPORT_SYMBOL_GPL(dm_btree_empty);
154
155
156
157
158
159
160
161#define MAX_SPINE_DEPTH 64
162struct frame {
163 struct dm_block *b;
164 struct btree_node *n;
165 unsigned level;
166 unsigned nr_children;
167 unsigned current_child;
168};
169
170struct del_stack {
171 struct dm_btree_info *info;
172 struct dm_transaction_manager *tm;
173 int top;
174 struct frame spine[MAX_SPINE_DEPTH];
175};
176
177static int top_frame(struct del_stack *s, struct frame **f)
178{
179 if (s->top < 0) {
180 DMERR("btree deletion stack empty");
181 return -EINVAL;
182 }
183
184 *f = s->spine + s->top;
185
186 return 0;
187}
188
189static int unprocessed_frames(struct del_stack *s)
190{
191 return s->top >= 0;
192}
193
194static void prefetch_children(struct del_stack *s, struct frame *f)
195{
196 unsigned i;
197 struct dm_block_manager *bm = dm_tm_get_bm(s->tm);
198
199 for (i = 0; i < f->nr_children; i++)
200 dm_bm_prefetch(bm, value64(f->n, i));
201}
202
203static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
204{
205 return f->level < (info->levels - 1);
206}
207
208static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
209{
210 int r;
211 uint32_t ref_count;
212
213 if (s->top >= MAX_SPINE_DEPTH - 1) {
214 DMERR("btree deletion stack out of memory");
215 return -ENOMEM;
216 }
217
218 r = dm_tm_ref(s->tm, b, &ref_count);
219 if (r)
220 return r;
221
222 if (ref_count > 1)
223
224
225
226
227 dm_tm_dec(s->tm, b);
228
229 else {
230 uint32_t flags;
231 struct frame *f = s->spine + ++s->top;
232
233 r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
234 if (r) {
235 s->top--;
236 return r;
237 }
238
239 f->n = dm_block_data(f->b);
240 f->level = level;
241 f->nr_children = le32_to_cpu(f->n->header.nr_entries);
242 f->current_child = 0;
243
244 flags = le32_to_cpu(f->n->header.flags);
245 if (flags & INTERNAL_NODE || is_internal_level(s->info, f))
246 prefetch_children(s, f);
247 }
248
249 return 0;
250}
251
252static void pop_frame(struct del_stack *s)
253{
254 struct frame *f = s->spine + s->top--;
255
256 dm_tm_dec(s->tm, dm_block_location(f->b));
257 dm_tm_unlock(s->tm, f->b);
258}
259
260static void unlock_all_frames(struct del_stack *s)
261{
262 struct frame *f;
263
264 while (unprocessed_frames(s)) {
265 f = s->spine + s->top--;
266 dm_tm_unlock(s->tm, f->b);
267 }
268}
269
270int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
271{
272 int r;
273 struct del_stack *s;
274
275 s = kmalloc(sizeof(*s), GFP_NOIO);
276 if (!s)
277 return -ENOMEM;
278 s->info = info;
279 s->tm = info->tm;
280 s->top = -1;
281
282 r = push_frame(s, root, 0);
283 if (r)
284 goto out;
285
286 while (unprocessed_frames(s)) {
287 uint32_t flags;
288 struct frame *f;
289 dm_block_t b;
290
291 r = top_frame(s, &f);
292 if (r)
293 goto out;
294
295 if (f->current_child >= f->nr_children) {
296 pop_frame(s);
297 continue;
298 }
299
300 flags = le32_to_cpu(f->n->header.flags);
301 if (flags & INTERNAL_NODE) {
302 b = value64(f->n, f->current_child);
303 f->current_child++;
304 r = push_frame(s, b, f->level);
305 if (r)
306 goto out;
307
308 } else if (is_internal_level(info, f)) {
309 b = value64(f->n, f->current_child);
310 f->current_child++;
311 r = push_frame(s, b, f->level + 1);
312 if (r)
313 goto out;
314
315 } else {
316 if (info->value_type.dec) {
317 unsigned i;
318
319 for (i = 0; i < f->nr_children; i++)
320 info->value_type.dec(info->value_type.context,
321 value_ptr(f->n, i));
322 }
323 pop_frame(s);
324 }
325 }
326out:
327 if (r) {
328
329 unlock_all_frames(s);
330 }
331 kfree(s);
332
333 return r;
334}
335EXPORT_SYMBOL_GPL(dm_btree_del);
336
337
338
339static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
340 int (*search_fn)(struct btree_node *, uint64_t),
341 uint64_t *result_key, void *v, size_t value_size)
342{
343 int i, r;
344 uint32_t flags, nr_entries;
345
346 do {
347 r = ro_step(s, block);
348 if (r < 0)
349 return r;
350
351 i = search_fn(ro_node(s), key);
352
353 flags = le32_to_cpu(ro_node(s)->header.flags);
354 nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
355 if (i < 0 || i >= nr_entries)
356 return -ENODATA;
357
358 if (flags & INTERNAL_NODE)
359 block = value64(ro_node(s), i);
360
361 } while (!(flags & LEAF_NODE));
362
363 *result_key = le64_to_cpu(ro_node(s)->keys[i]);
364 memcpy(v, value_ptr(ro_node(s), i), value_size);
365
366 return 0;
367}
368
369int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
370 uint64_t *keys, void *value_le)
371{
372 unsigned level, last_level = info->levels - 1;
373 int r = -ENODATA;
374 uint64_t rkey;
375 __le64 internal_value_le;
376 struct ro_spine spine;
377
378 init_ro_spine(&spine, info);
379 for (level = 0; level < info->levels; level++) {
380 size_t size;
381 void *value_p;
382
383 if (level == last_level) {
384 value_p = value_le;
385 size = info->value_type.size;
386
387 } else {
388 value_p = &internal_value_le;
389 size = sizeof(uint64_t);
390 }
391
392 r = btree_lookup_raw(&spine, root, keys[level],
393 lower_bound, &rkey,
394 value_p, size);
395
396 if (!r) {
397 if (rkey != keys[level]) {
398 exit_ro_spine(&spine);
399 return -ENODATA;
400 }
401 } else {
402 exit_ro_spine(&spine);
403 return r;
404 }
405
406 root = le64_to_cpu(internal_value_le);
407 }
408 exit_ro_spine(&spine);
409
410 return r;
411}
412EXPORT_SYMBOL_GPL(dm_btree_lookup);
413
414static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t root,
415 uint64_t key, uint64_t *rkey, void *value_le)
416{
417 int r, i;
418 uint32_t flags, nr_entries;
419 struct dm_block *node;
420 struct btree_node *n;
421
422 r = bn_read_lock(info, root, &node);
423 if (r)
424 return r;
425
426 n = dm_block_data(node);
427 flags = le32_to_cpu(n->header.flags);
428 nr_entries = le32_to_cpu(n->header.nr_entries);
429
430 if (flags & INTERNAL_NODE) {
431 i = lower_bound(n, key);
432 if (i < 0 || i >= nr_entries) {
433 r = -ENODATA;
434 goto out;
435 }
436
437 r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
438 if (r == -ENODATA && i < (nr_entries - 1)) {
439 i++;
440 r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
441 }
442
443 } else {
444 i = upper_bound(n, key);
445 if (i < 0 || i >= nr_entries) {
446 r = -ENODATA;
447 goto out;
448 }
449
450 *rkey = le64_to_cpu(n->keys[i]);
451 memcpy(value_le, value_ptr(n, i), info->value_type.size);
452 }
453out:
454 dm_tm_unlock(info->tm, node);
455 return r;
456}
457
458int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
459 uint64_t *keys, uint64_t *rkey, void *value_le)
460{
461 unsigned level;
462 int r = -ENODATA;
463 __le64 internal_value_le;
464 struct ro_spine spine;
465
466 init_ro_spine(&spine, info);
467 for (level = 0; level < info->levels - 1u; level++) {
468 r = btree_lookup_raw(&spine, root, keys[level],
469 lower_bound, rkey,
470 &internal_value_le, sizeof(uint64_t));
471 if (r)
472 goto out;
473
474 if (*rkey != keys[level]) {
475 r = -ENODATA;
476 goto out;
477 }
478
479 root = le64_to_cpu(internal_value_le);
480 }
481
482 r = dm_btree_lookup_next_single(info, root, keys[level], rkey, value_le);
483out:
484 exit_ro_spine(&spine);
485 return r;
486}
487
488EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
521 uint64_t key)
522{
523 int r;
524 size_t size;
525 unsigned nr_left, nr_right;
526 struct dm_block *left, *right, *parent;
527 struct btree_node *ln, *rn, *pn;
528 __le64 location;
529
530 left = shadow_current(s);
531
532 r = new_block(s->info, &right);
533 if (r < 0)
534 return r;
535
536 ln = dm_block_data(left);
537 rn = dm_block_data(right);
538
539 nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
540 nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
541
542 ln->header.nr_entries = cpu_to_le32(nr_left);
543
544 rn->header.flags = ln->header.flags;
545 rn->header.nr_entries = cpu_to_le32(nr_right);
546 rn->header.max_entries = ln->header.max_entries;
547 rn->header.value_size = ln->header.value_size;
548 memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
549
550 size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
551 sizeof(uint64_t) : s->info->value_type.size;
552 memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
553 size * nr_right);
554
555
556
557
558 parent = shadow_parent(s);
559
560 pn = dm_block_data(parent);
561 location = cpu_to_le64(dm_block_location(left));
562 __dm_bless_for_disk(&location);
563 memcpy_disk(value_ptr(pn, parent_index),
564 &location, sizeof(__le64));
565
566 location = cpu_to_le64(dm_block_location(right));
567 __dm_bless_for_disk(&location);
568
569 r = insert_at(sizeof(__le64), pn, parent_index + 1,
570 le64_to_cpu(rn->keys[0]), &location);
571 if (r) {
572 unlock_block(s->info, right);
573 return r;
574 }
575
576 if (key < le64_to_cpu(rn->keys[0])) {
577 unlock_block(s->info, right);
578 s->nodes[1] = left;
579 } else {
580 unlock_block(s->info, left);
581 s->nodes[1] = right;
582 }
583
584 return 0;
585}
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
609{
610 int r;
611 size_t size;
612 unsigned nr_left, nr_right;
613 struct dm_block *left, *right, *new_parent;
614 struct btree_node *pn, *ln, *rn;
615 __le64 val;
616
617 new_parent = shadow_current(s);
618
619 r = new_block(s->info, &left);
620 if (r < 0)
621 return r;
622
623 r = new_block(s->info, &right);
624 if (r < 0) {
625 unlock_block(s->info, left);
626 return r;
627 }
628
629 pn = dm_block_data(new_parent);
630 ln = dm_block_data(left);
631 rn = dm_block_data(right);
632
633 nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
634 nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
635
636 ln->header.flags = pn->header.flags;
637 ln->header.nr_entries = cpu_to_le32(nr_left);
638 ln->header.max_entries = pn->header.max_entries;
639 ln->header.value_size = pn->header.value_size;
640
641 rn->header.flags = pn->header.flags;
642 rn->header.nr_entries = cpu_to_le32(nr_right);
643 rn->header.max_entries = pn->header.max_entries;
644 rn->header.value_size = pn->header.value_size;
645
646 memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
647 memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
648
649 size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
650 sizeof(__le64) : s->info->value_type.size;
651 memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
652 memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
653 nr_right * size);
654
655
656 pn->header.flags = cpu_to_le32(INTERNAL_NODE);
657 pn->header.nr_entries = cpu_to_le32(2);
658 pn->header.max_entries = cpu_to_le32(
659 calc_max_entries(sizeof(__le64),
660 dm_bm_block_size(
661 dm_tm_get_bm(s->info->tm))));
662 pn->header.value_size = cpu_to_le32(sizeof(__le64));
663
664 val = cpu_to_le64(dm_block_location(left));
665 __dm_bless_for_disk(&val);
666 pn->keys[0] = ln->keys[0];
667 memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64));
668
669 val = cpu_to_le64(dm_block_location(right));
670 __dm_bless_for_disk(&val);
671 pn->keys[1] = rn->keys[0];
672 memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
673
674
675
676
677
678 if (s->nodes[0] != new_parent) {
679 unlock_block(s->info, s->nodes[0]);
680 s->nodes[0] = new_parent;
681 }
682 if (key < le64_to_cpu(rn->keys[0])) {
683 unlock_block(s->info, right);
684 s->nodes[1] = left;
685 } else {
686 unlock_block(s->info, left);
687 s->nodes[1] = right;
688 }
689 s->count = 2;
690
691 return 0;
692}
693
694static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
695 struct dm_btree_value_type *vt,
696 uint64_t key, unsigned *index)
697{
698 int r, i = *index, top = 1;
699 struct btree_node *node;
700
701 for (;;) {
702 r = shadow_step(s, root, vt);
703 if (r < 0)
704 return r;
705
706 node = dm_block_data(shadow_current(s));
707
708
709
710
711
712
713 if (shadow_has_parent(s) && i >= 0) {
714 __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
715
716 __dm_bless_for_disk(&location);
717 memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
718 &location, sizeof(__le64));
719 }
720
721 node = dm_block_data(shadow_current(s));
722
723 if (node->header.nr_entries == node->header.max_entries) {
724 if (top)
725 r = btree_split_beneath(s, key);
726 else
727 r = btree_split_sibling(s, i, key);
728
729 if (r < 0)
730 return r;
731 }
732
733 node = dm_block_data(shadow_current(s));
734
735 i = lower_bound(node, key);
736
737 if (le32_to_cpu(node->header.flags) & LEAF_NODE)
738 break;
739
740 if (i < 0) {
741
742 node->keys[0] = cpu_to_le64(key);
743 i = 0;
744 }
745
746 root = value64(node, i);
747 top = 0;
748 }
749
750 if (i < 0 || le64_to_cpu(node->keys[i]) != key)
751 i++;
752
753 *index = i;
754 return 0;
755}
756
757static int insert(struct dm_btree_info *info, dm_block_t root,
758 uint64_t *keys, void *value, dm_block_t *new_root,
759 int *inserted)
760 __dm_written_to_disk(value)
761{
762 int r, need_insert;
763 unsigned level, index = -1, last_level = info->levels - 1;
764 dm_block_t block = root;
765 struct shadow_spine spine;
766 struct btree_node *n;
767 struct dm_btree_value_type le64_type;
768
769 init_le64_type(info->tm, &le64_type);
770 init_shadow_spine(&spine, info);
771
772 for (level = 0; level < (info->levels - 1); level++) {
773 r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
774 if (r < 0)
775 goto bad;
776
777 n = dm_block_data(shadow_current(&spine));
778 need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
779 (le64_to_cpu(n->keys[index]) != keys[level]));
780
781 if (need_insert) {
782 dm_block_t new_tree;
783 __le64 new_le;
784
785 r = dm_btree_empty(info, &new_tree);
786 if (r < 0)
787 goto bad;
788
789 new_le = cpu_to_le64(new_tree);
790 __dm_bless_for_disk(&new_le);
791
792 r = insert_at(sizeof(uint64_t), n, index,
793 keys[level], &new_le);
794 if (r)
795 goto bad;
796 }
797
798 if (level < last_level)
799 block = value64(n, index);
800 }
801
802 r = btree_insert_raw(&spine, block, &info->value_type,
803 keys[level], &index);
804 if (r < 0)
805 goto bad;
806
807 n = dm_block_data(shadow_current(&spine));
808 need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
809 (le64_to_cpu(n->keys[index]) != keys[level]));
810
811 if (need_insert) {
812 if (inserted)
813 *inserted = 1;
814
815 r = insert_at(info->value_type.size, n, index,
816 keys[level], value);
817 if (r)
818 goto bad_unblessed;
819 } else {
820 if (inserted)
821 *inserted = 0;
822
823 if (info->value_type.dec &&
824 (!info->value_type.equal ||
825 !info->value_type.equal(
826 info->value_type.context,
827 value_ptr(n, index),
828 value))) {
829 info->value_type.dec(info->value_type.context,
830 value_ptr(n, index));
831 }
832 memcpy_disk(value_ptr(n, index),
833 value, info->value_type.size);
834 }
835
836 *new_root = shadow_root(&spine);
837 exit_shadow_spine(&spine);
838
839 return 0;
840
841bad:
842 __dm_unbless_for_disk(value);
843bad_unblessed:
844 exit_shadow_spine(&spine);
845 return r;
846}
847
848int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
849 uint64_t *keys, void *value, dm_block_t *new_root)
850 __dm_written_to_disk(value)
851{
852 return insert(info, root, keys, value, new_root, NULL);
853}
854EXPORT_SYMBOL_GPL(dm_btree_insert);
855
856int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
857 uint64_t *keys, void *value, dm_block_t *new_root,
858 int *inserted)
859 __dm_written_to_disk(value)
860{
861 return insert(info, root, keys, value, new_root, inserted);
862}
863EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
864
865
866
867static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest,
868 uint64_t *result_key, dm_block_t *next_block)
869{
870 int i, r;
871 uint32_t flags;
872
873 do {
874 r = ro_step(s, block);
875 if (r < 0)
876 return r;
877
878 flags = le32_to_cpu(ro_node(s)->header.flags);
879 i = le32_to_cpu(ro_node(s)->header.nr_entries);
880 if (!i)
881 return -ENODATA;
882 else
883 i--;
884
885 if (find_highest)
886 *result_key = le64_to_cpu(ro_node(s)->keys[i]);
887 else
888 *result_key = le64_to_cpu(ro_node(s)->keys[0]);
889
890 if (next_block || flags & INTERNAL_NODE)
891 block = value64(ro_node(s), i);
892
893 } while (flags & INTERNAL_NODE);
894
895 if (next_block)
896 *next_block = block;
897 return 0;
898}
899
900static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root,
901 bool find_highest, uint64_t *result_keys)
902{
903 int r = 0, count = 0, level;
904 struct ro_spine spine;
905
906 init_ro_spine(&spine, info);
907 for (level = 0; level < info->levels; level++) {
908 r = find_key(&spine, root, find_highest, result_keys + level,
909 level == info->levels - 1 ? NULL : &root);
910 if (r == -ENODATA) {
911 r = 0;
912 break;
913
914 } else if (r)
915 break;
916
917 count++;
918 }
919 exit_ro_spine(&spine);
920
921 return r ? r : count;
922}
923
924int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
925 uint64_t *result_keys)
926{
927 return dm_btree_find_key(info, root, true, result_keys);
928}
929EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
930
931int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
932 uint64_t *result_keys)
933{
934 return dm_btree_find_key(info, root, false, result_keys);
935}
936EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key);
937
938
939
940
941
942
943
944static int walk_node(struct dm_btree_info *info, dm_block_t block,
945 int (*fn)(void *context, uint64_t *keys, void *leaf),
946 void *context)
947{
948 int r;
949 unsigned i, nr;
950 struct dm_block *node;
951 struct btree_node *n;
952 uint64_t keys;
953
954 r = bn_read_lock(info, block, &node);
955 if (r)
956 return r;
957
958 n = dm_block_data(node);
959
960 nr = le32_to_cpu(n->header.nr_entries);
961 for (i = 0; i < nr; i++) {
962 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
963 r = walk_node(info, value64(n, i), fn, context);
964 if (r)
965 goto out;
966 } else {
967 keys = le64_to_cpu(*key_ptr(n, i));
968 r = fn(context, &keys, value_ptr(n, i));
969 if (r)
970 goto out;
971 }
972 }
973
974out:
975 dm_tm_unlock(info->tm, node);
976 return r;
977}
978
979int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
980 int (*fn)(void *context, uint64_t *keys, void *leaf),
981 void *context)
982{
983 BUG_ON(info->levels > 1);
984 return walk_node(info, root, fn, context);
985}
986EXPORT_SYMBOL_GPL(dm_btree_walk);
987