1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/slab.h>
8#include <linux/rbtree.h>
9#include <linux/mm.h>
10#include "ctree.h"
11#include "disk-io.h"
12#include "transaction.h"
13#include "print-tree.h"
14#include "locking.h"
15#include "volumes.h"
16#include "qgroup.h"
17
18static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
19 *root, struct btrfs_path *path, int level);
20static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
21 const struct btrfs_key *ins_key, struct btrfs_path *path,
22 int data_size, int extend);
23static int push_node_left(struct btrfs_trans_handle *trans,
24 struct extent_buffer *dst,
25 struct extent_buffer *src, int empty);
26static int balance_node_right(struct btrfs_trans_handle *trans,
27 struct extent_buffer *dst_buf,
28 struct extent_buffer *src_buf);
29static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
30 int level, int slot);
31
32static const struct btrfs_csums {
33 u16 size;
34 const char name[10];
35 const char driver[12];
36} btrfs_csums[] = {
37 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
38 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
39 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
40 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
41 .driver = "blake2b-256" },
42};
43
44int btrfs_super_csum_size(const struct btrfs_super_block *s)
45{
46 u16 t = btrfs_super_csum_type(s);
47
48
49
50 return btrfs_csums[t].size;
51}
52
53const char *btrfs_super_csum_name(u16 csum_type)
54{
55
56 return btrfs_csums[csum_type].name;
57}
58
59
60
61
62
63const char *btrfs_super_csum_driver(u16 csum_type)
64{
65
66 return btrfs_csums[csum_type].driver[0] ?
67 btrfs_csums[csum_type].driver :
68 btrfs_csums[csum_type].name;
69}
70
71size_t __attribute_const__ btrfs_get_num_csums(void)
72{
73 return ARRAY_SIZE(btrfs_csums);
74}
75
76struct btrfs_path *btrfs_alloc_path(void)
77{
78 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
79}
80
81
82void btrfs_free_path(struct btrfs_path *p)
83{
84 if (!p)
85 return;
86 btrfs_release_path(p);
87 kmem_cache_free(btrfs_path_cachep, p);
88}
89
90
91
92
93
94
95
96noinline void btrfs_release_path(struct btrfs_path *p)
97{
98 int i;
99
100 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
101 p->slots[i] = 0;
102 if (!p->nodes[i])
103 continue;
104 if (p->locks[i]) {
105 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
106 p->locks[i] = 0;
107 }
108 free_extent_buffer(p->nodes[i]);
109 p->nodes[i] = NULL;
110 }
111}
112
113
114
115
116
117
118
119
120
121
122
123struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
124{
125 struct extent_buffer *eb;
126
127 while (1) {
128 rcu_read_lock();
129 eb = rcu_dereference(root->node);
130
131
132
133
134
135
136
137 if (atomic_inc_not_zero(&eb->refs)) {
138 rcu_read_unlock();
139 break;
140 }
141 rcu_read_unlock();
142 synchronize_rcu();
143 }
144 return eb;
145}
146
147
148
149
150
151
152static void add_root_to_dirty_list(struct btrfs_root *root)
153{
154 struct btrfs_fs_info *fs_info = root->fs_info;
155
156 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
157 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
158 return;
159
160 spin_lock(&fs_info->trans_lock);
161 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
162
163 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
164 list_move_tail(&root->dirty_list,
165 &fs_info->dirty_cowonly_roots);
166 else
167 list_move(&root->dirty_list,
168 &fs_info->dirty_cowonly_roots);
169 }
170 spin_unlock(&fs_info->trans_lock);
171}
172
173
174
175
176
177
178int btrfs_copy_root(struct btrfs_trans_handle *trans,
179 struct btrfs_root *root,
180 struct extent_buffer *buf,
181 struct extent_buffer **cow_ret, u64 new_root_objectid)
182{
183 struct btrfs_fs_info *fs_info = root->fs_info;
184 struct extent_buffer *cow;
185 int ret = 0;
186 int level;
187 struct btrfs_disk_key disk_key;
188
189 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
190 trans->transid != fs_info->running_transaction->transid);
191 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
192 trans->transid != root->last_trans);
193
194 level = btrfs_header_level(buf);
195 if (level == 0)
196 btrfs_item_key(buf, &disk_key, 0);
197 else
198 btrfs_node_key(buf, &disk_key, 0);
199
200 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
201 &disk_key, level, buf->start, 0,
202 BTRFS_NESTING_NEW_ROOT);
203 if (IS_ERR(cow))
204 return PTR_ERR(cow);
205
206 copy_extent_buffer_full(cow, buf);
207 btrfs_set_header_bytenr(cow, cow->start);
208 btrfs_set_header_generation(cow, trans->transid);
209 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
210 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
211 BTRFS_HEADER_FLAG_RELOC);
212 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
213 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
214 else
215 btrfs_set_header_owner(cow, new_root_objectid);
216
217 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
218
219 WARN_ON(btrfs_header_generation(buf) > trans->transid);
220 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
221 ret = btrfs_inc_ref(trans, root, cow, 1);
222 else
223 ret = btrfs_inc_ref(trans, root, cow, 0);
224
225 if (ret)
226 return ret;
227
228 btrfs_mark_buffer_dirty(cow);
229 *cow_ret = cow;
230 return 0;
231}
232
233enum mod_log_op {
234 MOD_LOG_KEY_REPLACE,
235 MOD_LOG_KEY_ADD,
236 MOD_LOG_KEY_REMOVE,
237 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
238 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
239 MOD_LOG_MOVE_KEYS,
240 MOD_LOG_ROOT_REPLACE,
241};
242
243struct tree_mod_root {
244 u64 logical;
245 u8 level;
246};
247
248struct tree_mod_elem {
249 struct rb_node node;
250 u64 logical;
251 u64 seq;
252 enum mod_log_op op;
253
254
255 int slot;
256
257
258 u64 generation;
259
260
261 struct btrfs_disk_key key;
262 u64 blockptr;
263
264
265 struct {
266 int dst_slot;
267 int nr_items;
268 } move;
269
270
271 struct tree_mod_root old_root;
272};
273
274
275
276
277static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
278{
279 return atomic64_inc_return(&fs_info->tree_mod_seq);
280}
281
282
283
284
285
286
287
288
289
290u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
291 struct seq_list *elem)
292{
293 write_lock(&fs_info->tree_mod_log_lock);
294 if (!elem->seq) {
295 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
296 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
297 }
298 write_unlock(&fs_info->tree_mod_log_lock);
299
300 return elem->seq;
301}
302
303void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
304 struct seq_list *elem)
305{
306 struct rb_root *tm_root;
307 struct rb_node *node;
308 struct rb_node *next;
309 struct tree_mod_elem *tm;
310 u64 min_seq = (u64)-1;
311 u64 seq_putting = elem->seq;
312
313 if (!seq_putting)
314 return;
315
316 write_lock(&fs_info->tree_mod_log_lock);
317 list_del(&elem->list);
318 elem->seq = 0;
319
320 if (!list_empty(&fs_info->tree_mod_seq_list)) {
321 struct seq_list *first;
322
323 first = list_first_entry(&fs_info->tree_mod_seq_list,
324 struct seq_list, list);
325 if (seq_putting > first->seq) {
326
327
328
329
330 write_unlock(&fs_info->tree_mod_log_lock);
331 return;
332 }
333 min_seq = first->seq;
334 }
335
336
337
338
339
340 tm_root = &fs_info->tree_mod_log;
341 for (node = rb_first(tm_root); node; node = next) {
342 next = rb_next(node);
343 tm = rb_entry(node, struct tree_mod_elem, node);
344 if (tm->seq >= min_seq)
345 continue;
346 rb_erase(node, tm_root);
347 kfree(tm);
348 }
349 write_unlock(&fs_info->tree_mod_log_lock);
350}
351
352
353
354
355
356
357
358
359
360static noinline int
361__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
362{
363 struct rb_root *tm_root;
364 struct rb_node **new;
365 struct rb_node *parent = NULL;
366 struct tree_mod_elem *cur;
367
368 lockdep_assert_held_write(&fs_info->tree_mod_log_lock);
369
370 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
371
372 tm_root = &fs_info->tree_mod_log;
373 new = &tm_root->rb_node;
374 while (*new) {
375 cur = rb_entry(*new, struct tree_mod_elem, node);
376 parent = *new;
377 if (cur->logical < tm->logical)
378 new = &((*new)->rb_left);
379 else if (cur->logical > tm->logical)
380 new = &((*new)->rb_right);
381 else if (cur->seq < tm->seq)
382 new = &((*new)->rb_left);
383 else if (cur->seq > tm->seq)
384 new = &((*new)->rb_right);
385 else
386 return -EEXIST;
387 }
388
389 rb_link_node(&tm->node, parent, new);
390 rb_insert_color(&tm->node, tm_root);
391 return 0;
392}
393
394
395
396
397
398
399
400static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
401 struct extent_buffer *eb) {
402 smp_mb();
403 if (list_empty(&(fs_info)->tree_mod_seq_list))
404 return 1;
405 if (eb && btrfs_header_level(eb) == 0)
406 return 1;
407
408 write_lock(&fs_info->tree_mod_log_lock);
409 if (list_empty(&(fs_info)->tree_mod_seq_list)) {
410 write_unlock(&fs_info->tree_mod_log_lock);
411 return 1;
412 }
413
414 return 0;
415}
416
417
418static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
419 struct extent_buffer *eb)
420{
421 smp_mb();
422 if (list_empty(&(fs_info)->tree_mod_seq_list))
423 return 0;
424 if (eb && btrfs_header_level(eb) == 0)
425 return 0;
426
427 return 1;
428}
429
430static struct tree_mod_elem *
431alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
432 enum mod_log_op op, gfp_t flags)
433{
434 struct tree_mod_elem *tm;
435
436 tm = kzalloc(sizeof(*tm), flags);
437 if (!tm)
438 return NULL;
439
440 tm->logical = eb->start;
441 if (op != MOD_LOG_KEY_ADD) {
442 btrfs_node_key(eb, &tm->key, slot);
443 tm->blockptr = btrfs_node_blockptr(eb, slot);
444 }
445 tm->op = op;
446 tm->slot = slot;
447 tm->generation = btrfs_node_ptr_generation(eb, slot);
448 RB_CLEAR_NODE(&tm->node);
449
450 return tm;
451}
452
453static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
454 enum mod_log_op op, gfp_t flags)
455{
456 struct tree_mod_elem *tm;
457 int ret;
458
459 if (!tree_mod_need_log(eb->fs_info, eb))
460 return 0;
461
462 tm = alloc_tree_mod_elem(eb, slot, op, flags);
463 if (!tm)
464 return -ENOMEM;
465
466 if (tree_mod_dont_log(eb->fs_info, eb)) {
467 kfree(tm);
468 return 0;
469 }
470
471 ret = __tree_mod_log_insert(eb->fs_info, tm);
472 write_unlock(&eb->fs_info->tree_mod_log_lock);
473 if (ret)
474 kfree(tm);
475
476 return ret;
477}
478
479static noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
480 int dst_slot, int src_slot, int nr_items)
481{
482 struct tree_mod_elem *tm = NULL;
483 struct tree_mod_elem **tm_list = NULL;
484 int ret = 0;
485 int i;
486 int locked = 0;
487
488 if (!tree_mod_need_log(eb->fs_info, eb))
489 return 0;
490
491 tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
492 if (!tm_list)
493 return -ENOMEM;
494
495 tm = kzalloc(sizeof(*tm), GFP_NOFS);
496 if (!tm) {
497 ret = -ENOMEM;
498 goto free_tms;
499 }
500
501 tm->logical = eb->start;
502 tm->slot = src_slot;
503 tm->move.dst_slot = dst_slot;
504 tm->move.nr_items = nr_items;
505 tm->op = MOD_LOG_MOVE_KEYS;
506
507 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
508 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
509 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
510 if (!tm_list[i]) {
511 ret = -ENOMEM;
512 goto free_tms;
513 }
514 }
515
516 if (tree_mod_dont_log(eb->fs_info, eb))
517 goto free_tms;
518 locked = 1;
519
520
521
522
523
524
525 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
526 ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
527 if (ret)
528 goto free_tms;
529 }
530
531 ret = __tree_mod_log_insert(eb->fs_info, tm);
532 if (ret)
533 goto free_tms;
534 write_unlock(&eb->fs_info->tree_mod_log_lock);
535 kfree(tm_list);
536
537 return 0;
538free_tms:
539 for (i = 0; i < nr_items; i++) {
540 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
541 rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
542 kfree(tm_list[i]);
543 }
544 if (locked)
545 write_unlock(&eb->fs_info->tree_mod_log_lock);
546 kfree(tm_list);
547 kfree(tm);
548
549 return ret;
550}
551
552static inline int
553__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
554 struct tree_mod_elem **tm_list,
555 int nritems)
556{
557 int i, j;
558 int ret;
559
560 for (i = nritems - 1; i >= 0; i--) {
561 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
562 if (ret) {
563 for (j = nritems - 1; j > i; j--)
564 rb_erase(&tm_list[j]->node,
565 &fs_info->tree_mod_log);
566 return ret;
567 }
568 }
569
570 return 0;
571}
572
573static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
574 struct extent_buffer *new_root, int log_removal)
575{
576 struct btrfs_fs_info *fs_info = old_root->fs_info;
577 struct tree_mod_elem *tm = NULL;
578 struct tree_mod_elem **tm_list = NULL;
579 int nritems = 0;
580 int ret = 0;
581 int i;
582
583 if (!tree_mod_need_log(fs_info, NULL))
584 return 0;
585
586 if (log_removal && btrfs_header_level(old_root) > 0) {
587 nritems = btrfs_header_nritems(old_root);
588 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
589 GFP_NOFS);
590 if (!tm_list) {
591 ret = -ENOMEM;
592 goto free_tms;
593 }
594 for (i = 0; i < nritems; i++) {
595 tm_list[i] = alloc_tree_mod_elem(old_root, i,
596 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
597 if (!tm_list[i]) {
598 ret = -ENOMEM;
599 goto free_tms;
600 }
601 }
602 }
603
604 tm = kzalloc(sizeof(*tm), GFP_NOFS);
605 if (!tm) {
606 ret = -ENOMEM;
607 goto free_tms;
608 }
609
610 tm->logical = new_root->start;
611 tm->old_root.logical = old_root->start;
612 tm->old_root.level = btrfs_header_level(old_root);
613 tm->generation = btrfs_header_generation(old_root);
614 tm->op = MOD_LOG_ROOT_REPLACE;
615
616 if (tree_mod_dont_log(fs_info, NULL))
617 goto free_tms;
618
619 if (tm_list)
620 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
621 if (!ret)
622 ret = __tree_mod_log_insert(fs_info, tm);
623
624 write_unlock(&fs_info->tree_mod_log_lock);
625 if (ret)
626 goto free_tms;
627 kfree(tm_list);
628
629 return ret;
630
631free_tms:
632 if (tm_list) {
633 for (i = 0; i < nritems; i++)
634 kfree(tm_list[i]);
635 kfree(tm_list);
636 }
637 kfree(tm);
638
639 return ret;
640}
641
642static struct tree_mod_elem *
643__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
644 int smallest)
645{
646 struct rb_root *tm_root;
647 struct rb_node *node;
648 struct tree_mod_elem *cur = NULL;
649 struct tree_mod_elem *found = NULL;
650
651 read_lock(&fs_info->tree_mod_log_lock);
652 tm_root = &fs_info->tree_mod_log;
653 node = tm_root->rb_node;
654 while (node) {
655 cur = rb_entry(node, struct tree_mod_elem, node);
656 if (cur->logical < start) {
657 node = node->rb_left;
658 } else if (cur->logical > start) {
659 node = node->rb_right;
660 } else if (cur->seq < min_seq) {
661 node = node->rb_left;
662 } else if (!smallest) {
663
664 if (found)
665 BUG_ON(found->seq > cur->seq);
666 found = cur;
667 node = node->rb_left;
668 } else if (cur->seq > min_seq) {
669
670 if (found)
671 BUG_ON(found->seq < cur->seq);
672 found = cur;
673 node = node->rb_right;
674 } else {
675 found = cur;
676 break;
677 }
678 }
679 read_unlock(&fs_info->tree_mod_log_lock);
680
681 return found;
682}
683
684
685
686
687
688
689static struct tree_mod_elem *
690tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
691 u64 min_seq)
692{
693 return __tree_mod_log_search(fs_info, start, min_seq, 1);
694}
695
696
697
698
699
700
701static struct tree_mod_elem *
702tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
703{
704 return __tree_mod_log_search(fs_info, start, min_seq, 0);
705}
706
707static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst,
708 struct extent_buffer *src, unsigned long dst_offset,
709 unsigned long src_offset, int nr_items)
710{
711 struct btrfs_fs_info *fs_info = dst->fs_info;
712 int ret = 0;
713 struct tree_mod_elem **tm_list = NULL;
714 struct tree_mod_elem **tm_list_add, **tm_list_rem;
715 int i;
716 int locked = 0;
717
718 if (!tree_mod_need_log(fs_info, NULL))
719 return 0;
720
721 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
722 return 0;
723
724 tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
725 GFP_NOFS);
726 if (!tm_list)
727 return -ENOMEM;
728
729 tm_list_add = tm_list;
730 tm_list_rem = tm_list + nr_items;
731 for (i = 0; i < nr_items; i++) {
732 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
733 MOD_LOG_KEY_REMOVE, GFP_NOFS);
734 if (!tm_list_rem[i]) {
735 ret = -ENOMEM;
736 goto free_tms;
737 }
738
739 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
740 MOD_LOG_KEY_ADD, GFP_NOFS);
741 if (!tm_list_add[i]) {
742 ret = -ENOMEM;
743 goto free_tms;
744 }
745 }
746
747 if (tree_mod_dont_log(fs_info, NULL))
748 goto free_tms;
749 locked = 1;
750
751 for (i = 0; i < nr_items; i++) {
752 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
753 if (ret)
754 goto free_tms;
755 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
756 if (ret)
757 goto free_tms;
758 }
759
760 write_unlock(&fs_info->tree_mod_log_lock);
761 kfree(tm_list);
762
763 return 0;
764
765free_tms:
766 for (i = 0; i < nr_items * 2; i++) {
767 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
768 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
769 kfree(tm_list[i]);
770 }
771 if (locked)
772 write_unlock(&fs_info->tree_mod_log_lock);
773 kfree(tm_list);
774
775 return ret;
776}
777
778static noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
779{
780 struct tree_mod_elem **tm_list = NULL;
781 int nritems = 0;
782 int i;
783 int ret = 0;
784
785 if (btrfs_header_level(eb) == 0)
786 return 0;
787
788 if (!tree_mod_need_log(eb->fs_info, NULL))
789 return 0;
790
791 nritems = btrfs_header_nritems(eb);
792 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
793 if (!tm_list)
794 return -ENOMEM;
795
796 for (i = 0; i < nritems; i++) {
797 tm_list[i] = alloc_tree_mod_elem(eb, i,
798 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
799 if (!tm_list[i]) {
800 ret = -ENOMEM;
801 goto free_tms;
802 }
803 }
804
805 if (tree_mod_dont_log(eb->fs_info, eb))
806 goto free_tms;
807
808 ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
809 write_unlock(&eb->fs_info->tree_mod_log_lock);
810 if (ret)
811 goto free_tms;
812 kfree(tm_list);
813
814 return 0;
815
816free_tms:
817 for (i = 0; i < nritems; i++)
818 kfree(tm_list[i]);
819 kfree(tm_list);
820
821 return ret;
822}
823
824
825
826
827int btrfs_block_can_be_shared(struct btrfs_root *root,
828 struct extent_buffer *buf)
829{
830
831
832
833
834
835 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
836 buf != root->node && buf != root->commit_root &&
837 (btrfs_header_generation(buf) <=
838 btrfs_root_last_snapshot(&root->root_item) ||
839 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
840 return 1;
841
842 return 0;
843}
844
845static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
846 struct btrfs_root *root,
847 struct extent_buffer *buf,
848 struct extent_buffer *cow,
849 int *last_ref)
850{
851 struct btrfs_fs_info *fs_info = root->fs_info;
852 u64 refs;
853 u64 owner;
854 u64 flags;
855 u64 new_flags = 0;
856 int ret;
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875 if (btrfs_block_can_be_shared(root, buf)) {
876 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
877 btrfs_header_level(buf), 1,
878 &refs, &flags);
879 if (ret)
880 return ret;
881 if (refs == 0) {
882 ret = -EROFS;
883 btrfs_handle_fs_error(fs_info, ret, NULL);
884 return ret;
885 }
886 } else {
887 refs = 1;
888 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
889 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
890 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
891 else
892 flags = 0;
893 }
894
895 owner = btrfs_header_owner(buf);
896 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
897 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
898
899 if (refs > 1) {
900 if ((owner == root->root_key.objectid ||
901 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
902 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
903 ret = btrfs_inc_ref(trans, root, buf, 1);
904 if (ret)
905 return ret;
906
907 if (root->root_key.objectid ==
908 BTRFS_TREE_RELOC_OBJECTID) {
909 ret = btrfs_dec_ref(trans, root, buf, 0);
910 if (ret)
911 return ret;
912 ret = btrfs_inc_ref(trans, root, cow, 1);
913 if (ret)
914 return ret;
915 }
916 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
917 } else {
918
919 if (root->root_key.objectid ==
920 BTRFS_TREE_RELOC_OBJECTID)
921 ret = btrfs_inc_ref(trans, root, cow, 1);
922 else
923 ret = btrfs_inc_ref(trans, root, cow, 0);
924 if (ret)
925 return ret;
926 }
927 if (new_flags != 0) {
928 int level = btrfs_header_level(buf);
929
930 ret = btrfs_set_disk_extent_flags(trans, buf,
931 new_flags, level, 0);
932 if (ret)
933 return ret;
934 }
935 } else {
936 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
937 if (root->root_key.objectid ==
938 BTRFS_TREE_RELOC_OBJECTID)
939 ret = btrfs_inc_ref(trans, root, cow, 1);
940 else
941 ret = btrfs_inc_ref(trans, root, cow, 0);
942 if (ret)
943 return ret;
944 ret = btrfs_dec_ref(trans, root, buf, 1);
945 if (ret)
946 return ret;
947 }
948 btrfs_clean_tree_block(buf);
949 *last_ref = 1;
950 }
951 return 0;
952}
953
954static struct extent_buffer *alloc_tree_block_no_bg_flush(
955 struct btrfs_trans_handle *trans,
956 struct btrfs_root *root,
957 u64 parent_start,
958 const struct btrfs_disk_key *disk_key,
959 int level,
960 u64 hint,
961 u64 empty_size,
962 enum btrfs_lock_nesting nest)
963{
964 struct btrfs_fs_info *fs_info = root->fs_info;
965 struct extent_buffer *ret;
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983 if (root == fs_info->extent_root ||
984 root == fs_info->chunk_root ||
985 root == fs_info->dev_root ||
986 root == fs_info->free_space_root)
987 trans->can_flush_pending_bgs = false;
988
989 ret = btrfs_alloc_tree_block(trans, root, parent_start,
990 root->root_key.objectid, disk_key, level,
991 hint, empty_size, nest);
992 trans->can_flush_pending_bgs = true;
993
994 return ret;
995}
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1010 struct btrfs_root *root,
1011 struct extent_buffer *buf,
1012 struct extent_buffer *parent, int parent_slot,
1013 struct extent_buffer **cow_ret,
1014 u64 search_start, u64 empty_size,
1015 enum btrfs_lock_nesting nest)
1016{
1017 struct btrfs_fs_info *fs_info = root->fs_info;
1018 struct btrfs_disk_key disk_key;
1019 struct extent_buffer *cow;
1020 int level, ret;
1021 int last_ref = 0;
1022 int unlock_orig = 0;
1023 u64 parent_start = 0;
1024
1025 if (*cow_ret == buf)
1026 unlock_orig = 1;
1027
1028 btrfs_assert_tree_locked(buf);
1029
1030 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
1031 trans->transid != fs_info->running_transaction->transid);
1032 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
1033 trans->transid != root->last_trans);
1034
1035 level = btrfs_header_level(buf);
1036
1037 if (level == 0)
1038 btrfs_item_key(buf, &disk_key, 0);
1039 else
1040 btrfs_node_key(buf, &disk_key, 0);
1041
1042 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1043 parent_start = parent->start;
1044
1045 cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
1046 level, search_start, empty_size, nest);
1047 if (IS_ERR(cow))
1048 return PTR_ERR(cow);
1049
1050
1051
1052 copy_extent_buffer_full(cow, buf);
1053 btrfs_set_header_bytenr(cow, cow->start);
1054 btrfs_set_header_generation(cow, trans->transid);
1055 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
1056 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
1057 BTRFS_HEADER_FLAG_RELOC);
1058 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1059 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
1060 else
1061 btrfs_set_header_owner(cow, root->root_key.objectid);
1062
1063 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
1064
1065 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
1066 if (ret) {
1067 btrfs_tree_unlock(cow);
1068 free_extent_buffer(cow);
1069 btrfs_abort_transaction(trans, ret);
1070 return ret;
1071 }
1072
1073 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
1074 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
1075 if (ret) {
1076 btrfs_tree_unlock(cow);
1077 free_extent_buffer(cow);
1078 btrfs_abort_transaction(trans, ret);
1079 return ret;
1080 }
1081 }
1082
1083 if (buf == root->node) {
1084 WARN_ON(parent && parent != buf);
1085 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
1086 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
1087 parent_start = buf->start;
1088
1089 atomic_inc(&cow->refs);
1090 ret = tree_mod_log_insert_root(root->node, cow, 1);
1091 BUG_ON(ret < 0);
1092 rcu_assign_pointer(root->node, cow);
1093
1094 btrfs_free_tree_block(trans, root, buf, parent_start,
1095 last_ref);
1096 free_extent_buffer(buf);
1097 add_root_to_dirty_list(root);
1098 } else {
1099 WARN_ON(trans->transid != btrfs_header_generation(parent));
1100 tree_mod_log_insert_key(parent, parent_slot,
1101 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1102 btrfs_set_node_blockptr(parent, parent_slot,
1103 cow->start);
1104 btrfs_set_node_ptr_generation(parent, parent_slot,
1105 trans->transid);
1106 btrfs_mark_buffer_dirty(parent);
1107 if (last_ref) {
1108 ret = tree_mod_log_free_eb(buf);
1109 if (ret) {
1110 btrfs_tree_unlock(cow);
1111 free_extent_buffer(cow);
1112 btrfs_abort_transaction(trans, ret);
1113 return ret;
1114 }
1115 }
1116 btrfs_free_tree_block(trans, root, buf, parent_start,
1117 last_ref);
1118 }
1119 if (unlock_orig)
1120 btrfs_tree_unlock(buf);
1121 free_extent_buffer_stale(buf);
1122 btrfs_mark_buffer_dirty(cow);
1123 *cow_ret = cow;
1124 return 0;
1125}
1126
1127
1128
1129
1130
1131static struct tree_mod_elem *__tree_mod_log_oldest_root(
1132 struct extent_buffer *eb_root, u64 time_seq)
1133{
1134 struct tree_mod_elem *tm;
1135 struct tree_mod_elem *found = NULL;
1136 u64 root_logical = eb_root->start;
1137 int looped = 0;
1138
1139 if (!time_seq)
1140 return NULL;
1141
1142
1143
1144
1145
1146
1147
1148 while (1) {
1149 tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
1150 time_seq);
1151 if (!looped && !tm)
1152 return NULL;
1153
1154
1155
1156
1157
1158 if (!tm)
1159 break;
1160
1161
1162
1163
1164
1165
1166 if (tm->op != MOD_LOG_ROOT_REPLACE)
1167 break;
1168
1169 found = tm;
1170 root_logical = tm->old_root.logical;
1171 looped = 1;
1172 }
1173
1174
1175 if (!found)
1176 found = tm;
1177
1178 return found;
1179}
1180
1181
1182
1183
1184
1185
1186static void
1187__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1188 u64 time_seq, struct tree_mod_elem *first_tm)
1189{
1190 u32 n;
1191 struct rb_node *next;
1192 struct tree_mod_elem *tm = first_tm;
1193 unsigned long o_dst;
1194 unsigned long o_src;
1195 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1196
1197 n = btrfs_header_nritems(eb);
1198 read_lock(&fs_info->tree_mod_log_lock);
1199 while (tm && tm->seq >= time_seq) {
1200
1201
1202
1203
1204
1205 switch (tm->op) {
1206 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1207 BUG_ON(tm->slot < n);
1208 fallthrough;
1209 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
1210 case MOD_LOG_KEY_REMOVE:
1211 btrfs_set_node_key(eb, &tm->key, tm->slot);
1212 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1213 btrfs_set_node_ptr_generation(eb, tm->slot,
1214 tm->generation);
1215 n++;
1216 break;
1217 case MOD_LOG_KEY_REPLACE:
1218 BUG_ON(tm->slot >= n);
1219 btrfs_set_node_key(eb, &tm->key, tm->slot);
1220 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1221 btrfs_set_node_ptr_generation(eb, tm->slot,
1222 tm->generation);
1223 break;
1224 case MOD_LOG_KEY_ADD:
1225
1226 n--;
1227 break;
1228 case MOD_LOG_MOVE_KEYS:
1229 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1230 o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
1231 memmove_extent_buffer(eb, o_dst, o_src,
1232 tm->move.nr_items * p_size);
1233 break;
1234 case MOD_LOG_ROOT_REPLACE:
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244 break;
1245 }
1246 next = rb_next(&tm->node);
1247 if (!next)
1248 break;
1249 tm = rb_entry(next, struct tree_mod_elem, node);
1250 if (tm->logical != first_tm->logical)
1251 break;
1252 }
1253 read_unlock(&fs_info->tree_mod_log_lock);
1254 btrfs_set_header_nritems(eb, n);
1255}
1256
1257
1258
1259
1260
1261
1262
1263
1264static struct extent_buffer *
1265tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1266 struct extent_buffer *eb, u64 time_seq)
1267{
1268 struct extent_buffer *eb_rewin;
1269 struct tree_mod_elem *tm;
1270
1271 if (!time_seq)
1272 return eb;
1273
1274 if (btrfs_header_level(eb) == 0)
1275 return eb;
1276
1277 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1278 if (!tm)
1279 return eb;
1280
1281 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1282 BUG_ON(tm->slot != 0);
1283 eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
1284 if (!eb_rewin) {
1285 btrfs_tree_read_unlock(eb);
1286 free_extent_buffer(eb);
1287 return NULL;
1288 }
1289 btrfs_set_header_bytenr(eb_rewin, eb->start);
1290 btrfs_set_header_backref_rev(eb_rewin,
1291 btrfs_header_backref_rev(eb));
1292 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
1293 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
1294 } else {
1295 eb_rewin = btrfs_clone_extent_buffer(eb);
1296 if (!eb_rewin) {
1297 btrfs_tree_read_unlock(eb);
1298 free_extent_buffer(eb);
1299 return NULL;
1300 }
1301 }
1302
1303 btrfs_tree_read_unlock(eb);
1304 free_extent_buffer(eb);
1305
1306 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
1307 eb_rewin, btrfs_header_level(eb_rewin));
1308 btrfs_tree_read_lock(eb_rewin);
1309 __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
1310 WARN_ON(btrfs_header_nritems(eb_rewin) >
1311 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
1312
1313 return eb_rewin;
1314}
1315
1316
1317
1318
1319
1320
1321
1322
1323static inline struct extent_buffer *
1324get_old_root(struct btrfs_root *root, u64 time_seq)
1325{
1326 struct btrfs_fs_info *fs_info = root->fs_info;
1327 struct tree_mod_elem *tm;
1328 struct extent_buffer *eb = NULL;
1329 struct extent_buffer *eb_root;
1330 u64 eb_root_owner = 0;
1331 struct extent_buffer *old;
1332 struct tree_mod_root *old_root = NULL;
1333 u64 old_generation = 0;
1334 u64 logical;
1335 int level;
1336
1337 eb_root = btrfs_read_lock_root_node(root);
1338 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
1339 if (!tm)
1340 return eb_root;
1341
1342 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1343 old_root = &tm->old_root;
1344 old_generation = tm->generation;
1345 logical = old_root->logical;
1346 level = old_root->level;
1347 } else {
1348 logical = eb_root->start;
1349 level = btrfs_header_level(eb_root);
1350 }
1351
1352 tm = tree_mod_log_search(fs_info, logical, time_seq);
1353 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1354 btrfs_tree_read_unlock(eb_root);
1355 free_extent_buffer(eb_root);
1356 old = read_tree_block(fs_info, logical, root->root_key.objectid,
1357 0, level, NULL);
1358 if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
1359 if (!IS_ERR(old))
1360 free_extent_buffer(old);
1361 btrfs_warn(fs_info,
1362 "failed to read tree block %llu from get_old_root",
1363 logical);
1364 } else {
1365 eb = btrfs_clone_extent_buffer(old);
1366 free_extent_buffer(old);
1367 }
1368 } else if (old_root) {
1369 eb_root_owner = btrfs_header_owner(eb_root);
1370 btrfs_tree_read_unlock(eb_root);
1371 free_extent_buffer(eb_root);
1372 eb = alloc_dummy_extent_buffer(fs_info, logical);
1373 } else {
1374 eb = btrfs_clone_extent_buffer(eb_root);
1375 btrfs_tree_read_unlock(eb_root);
1376 free_extent_buffer(eb_root);
1377 }
1378
1379 if (!eb)
1380 return NULL;
1381 if (old_root) {
1382 btrfs_set_header_bytenr(eb, eb->start);
1383 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1384 btrfs_set_header_owner(eb, eb_root_owner);
1385 btrfs_set_header_level(eb, old_root->level);
1386 btrfs_set_header_generation(eb, old_generation);
1387 }
1388 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
1389 btrfs_header_level(eb));
1390 btrfs_tree_read_lock(eb);
1391 if (tm)
1392 __tree_mod_log_rewind(fs_info, eb, time_seq, tm);
1393 else
1394 WARN_ON(btrfs_header_level(eb) != 0);
1395 WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(fs_info));
1396
1397 return eb;
1398}
1399
1400int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1401{
1402 struct tree_mod_elem *tm;
1403 int level;
1404 struct extent_buffer *eb_root = btrfs_root_node(root);
1405
1406 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
1407 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1408 level = tm->old_root.level;
1409 } else {
1410 level = btrfs_header_level(eb_root);
1411 }
1412 free_extent_buffer(eb_root);
1413
1414 return level;
1415}
1416
1417static inline int should_cow_block(struct btrfs_trans_handle *trans,
1418 struct btrfs_root *root,
1419 struct extent_buffer *buf)
1420{
1421 if (btrfs_is_testing(root->fs_info))
1422 return 0;
1423
1424
1425 smp_mb__before_atomic();
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438 if (btrfs_header_generation(buf) == trans->transid &&
1439 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
1440 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
1441 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
1442 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
1443 return 0;
1444 return 1;
1445}
1446
1447
1448
1449
1450
1451
1452noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
1453 struct btrfs_root *root, struct extent_buffer *buf,
1454 struct extent_buffer *parent, int parent_slot,
1455 struct extent_buffer **cow_ret,
1456 enum btrfs_lock_nesting nest)
1457{
1458 struct btrfs_fs_info *fs_info = root->fs_info;
1459 u64 search_start;
1460 int ret;
1461
1462 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
1463 btrfs_err(fs_info,
1464 "COW'ing blocks on a fs root that's being dropped");
1465
1466 if (trans->transaction != fs_info->running_transaction)
1467 WARN(1, KERN_CRIT "trans %llu running %llu\n",
1468 trans->transid,
1469 fs_info->running_transaction->transid);
1470
1471 if (trans->transid != fs_info->generation)
1472 WARN(1, KERN_CRIT "trans %llu running %llu\n",
1473 trans->transid, fs_info->generation);
1474
1475 if (!should_cow_block(trans, root, buf)) {
1476 trans->dirty = true;
1477 *cow_ret = buf;
1478 return 0;
1479 }
1480
1481 search_start = buf->start & ~((u64)SZ_1G - 1);
1482
1483
1484
1485
1486
1487
1488
1489 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
1490 ret = __btrfs_cow_block(trans, root, buf, parent,
1491 parent_slot, cow_ret, search_start, 0, nest);
1492
1493 trace_btrfs_cow_block(root, buf, *cow_ret);
1494
1495 return ret;
1496}
1497
1498
1499
1500
1501
1502static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
1503{
1504 if (blocknr < other && other - (blocknr + blocksize) < 32768)
1505 return 1;
1506 if (blocknr > other && blocknr - (other + blocksize) < 32768)
1507 return 1;
1508 return 0;
1509}
1510
1511#ifdef __LITTLE_ENDIAN
1512
1513
1514
1515
1516
1517static int comp_keys(const struct btrfs_disk_key *disk_key,
1518 const struct btrfs_key *k2)
1519{
1520 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
1521
1522 return btrfs_comp_cpu_keys(k1, k2);
1523}
1524
1525#else
1526
1527
1528
1529
1530static int comp_keys(const struct btrfs_disk_key *disk,
1531 const struct btrfs_key *k2)
1532{
1533 struct btrfs_key k1;
1534
1535 btrfs_disk_key_to_cpu(&k1, disk);
1536
1537 return btrfs_comp_cpu_keys(&k1, k2);
1538}
1539#endif
1540
1541
1542
1543
1544int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
1545{
1546 if (k1->objectid > k2->objectid)
1547 return 1;
1548 if (k1->objectid < k2->objectid)
1549 return -1;
1550 if (k1->type > k2->type)
1551 return 1;
1552 if (k1->type < k2->type)
1553 return -1;
1554 if (k1->offset > k2->offset)
1555 return 1;
1556 if (k1->offset < k2->offset)
1557 return -1;
1558 return 0;
1559}
1560
1561
1562
1563
1564
1565
1566int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1567 struct btrfs_root *root, struct extent_buffer *parent,
1568 int start_slot, u64 *last_ret,
1569 struct btrfs_key *progress)
1570{
1571 struct btrfs_fs_info *fs_info = root->fs_info;
1572 struct extent_buffer *cur;
1573 u64 blocknr;
1574 u64 search_start = *last_ret;
1575 u64 last_block = 0;
1576 u64 other;
1577 u32 parent_nritems;
1578 int end_slot;
1579 int i;
1580 int err = 0;
1581 u32 blocksize;
1582 int progress_passed = 0;
1583 struct btrfs_disk_key disk_key;
1584
1585 WARN_ON(trans->transaction != fs_info->running_transaction);
1586 WARN_ON(trans->transid != fs_info->generation);
1587
1588 parent_nritems = btrfs_header_nritems(parent);
1589 blocksize = fs_info->nodesize;
1590 end_slot = parent_nritems - 1;
1591
1592 if (parent_nritems <= 1)
1593 return 0;
1594
1595 for (i = start_slot; i <= end_slot; i++) {
1596 int close = 1;
1597
1598 btrfs_node_key(parent, &disk_key, i);
1599 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
1600 continue;
1601
1602 progress_passed = 1;
1603 blocknr = btrfs_node_blockptr(parent, i);
1604 if (last_block == 0)
1605 last_block = blocknr;
1606
1607 if (i > 0) {
1608 other = btrfs_node_blockptr(parent, i - 1);
1609 close = close_blocks(blocknr, other, blocksize);
1610 }
1611 if (!close && i < end_slot) {
1612 other = btrfs_node_blockptr(parent, i + 1);
1613 close = close_blocks(blocknr, other, blocksize);
1614 }
1615 if (close) {
1616 last_block = blocknr;
1617 continue;
1618 }
1619
1620 cur = btrfs_read_node_slot(parent, i);
1621 if (IS_ERR(cur))
1622 return PTR_ERR(cur);
1623 if (search_start == 0)
1624 search_start = last_block;
1625
1626 btrfs_tree_lock(cur);
1627 err = __btrfs_cow_block(trans, root, cur, parent, i,
1628 &cur, search_start,
1629 min(16 * blocksize,
1630 (end_slot - i) * blocksize),
1631 BTRFS_NESTING_COW);
1632 if (err) {
1633 btrfs_tree_unlock(cur);
1634 free_extent_buffer(cur);
1635 break;
1636 }
1637 search_start = cur->start;
1638 last_block = cur->start;
1639 *last_ret = search_start;
1640 btrfs_tree_unlock(cur);
1641 free_extent_buffer(cur);
1642 }
1643 return err;
1644}
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656static noinline int generic_bin_search(struct extent_buffer *eb,
1657 unsigned long p, int item_size,
1658 const struct btrfs_key *key,
1659 int max, int *slot)
1660{
1661 int low = 0;
1662 int high = max;
1663 int ret;
1664 const int key_size = sizeof(struct btrfs_disk_key);
1665
1666 if (low > high) {
1667 btrfs_err(eb->fs_info,
1668 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
1669 __func__, low, high, eb->start,
1670 btrfs_header_owner(eb), btrfs_header_level(eb));
1671 return -EINVAL;
1672 }
1673
1674 while (low < high) {
1675 unsigned long oip;
1676 unsigned long offset;
1677 struct btrfs_disk_key *tmp;
1678 struct btrfs_disk_key unaligned;
1679 int mid;
1680
1681 mid = (low + high) / 2;
1682 offset = p + mid * item_size;
1683 oip = offset_in_page(offset);
1684
1685 if (oip + key_size <= PAGE_SIZE) {
1686 const unsigned long idx = get_eb_page_index(offset);
1687 char *kaddr = page_address(eb->pages[idx]);
1688
1689 oip = get_eb_offset_in_page(eb, offset);
1690 tmp = (struct btrfs_disk_key *)(kaddr + oip);
1691 } else {
1692 read_extent_buffer(eb, &unaligned, offset, key_size);
1693 tmp = &unaligned;
1694 }
1695
1696 ret = comp_keys(tmp, key);
1697
1698 if (ret < 0)
1699 low = mid + 1;
1700 else if (ret > 0)
1701 high = mid;
1702 else {
1703 *slot = mid;
1704 return 0;
1705 }
1706 }
1707 *slot = low;
1708 return 1;
1709}
1710
1711
1712
1713
1714
1715int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
1716 int *slot)
1717{
1718 if (btrfs_header_level(eb) == 0)
1719 return generic_bin_search(eb,
1720 offsetof(struct btrfs_leaf, items),
1721 sizeof(struct btrfs_item),
1722 key, btrfs_header_nritems(eb),
1723 slot);
1724 else
1725 return generic_bin_search(eb,
1726 offsetof(struct btrfs_node, ptrs),
1727 sizeof(struct btrfs_key_ptr),
1728 key, btrfs_header_nritems(eb),
1729 slot);
1730}
1731
1732static void root_add_used(struct btrfs_root *root, u32 size)
1733{
1734 spin_lock(&root->accounting_lock);
1735 btrfs_set_root_used(&root->root_item,
1736 btrfs_root_used(&root->root_item) + size);
1737 spin_unlock(&root->accounting_lock);
1738}
1739
1740static void root_sub_used(struct btrfs_root *root, u32 size)
1741{
1742 spin_lock(&root->accounting_lock);
1743 btrfs_set_root_used(&root->root_item,
1744 btrfs_root_used(&root->root_item) - size);
1745 spin_unlock(&root->accounting_lock);
1746}
1747
1748
1749
1750
1751struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
1752 int slot)
1753{
1754 int level = btrfs_header_level(parent);
1755 struct extent_buffer *eb;
1756 struct btrfs_key first_key;
1757
1758 if (slot < 0 || slot >= btrfs_header_nritems(parent))
1759 return ERR_PTR(-ENOENT);
1760
1761 BUG_ON(level == 0);
1762
1763 btrfs_node_key_to_cpu(parent, &first_key, slot);
1764 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
1765 btrfs_header_owner(parent),
1766 btrfs_node_ptr_generation(parent, slot),
1767 level - 1, &first_key);
1768 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
1769 free_extent_buffer(eb);
1770 eb = ERR_PTR(-EIO);
1771 }
1772
1773 return eb;
1774}
1775
1776
1777
1778
1779
1780
1781static noinline int balance_level(struct btrfs_trans_handle *trans,
1782 struct btrfs_root *root,
1783 struct btrfs_path *path, int level)
1784{
1785 struct btrfs_fs_info *fs_info = root->fs_info;
1786 struct extent_buffer *right = NULL;
1787 struct extent_buffer *mid;
1788 struct extent_buffer *left = NULL;
1789 struct extent_buffer *parent = NULL;
1790 int ret = 0;
1791 int wret;
1792 int pslot;
1793 int orig_slot = path->slots[level];
1794 u64 orig_ptr;
1795
1796 ASSERT(level > 0);
1797
1798 mid = path->nodes[level];
1799
1800 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
1801 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1802
1803 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1804
1805 if (level < BTRFS_MAX_LEVEL - 1) {
1806 parent = path->nodes[level + 1];
1807 pslot = path->slots[level + 1];
1808 }
1809
1810
1811
1812
1813
1814 if (!parent) {
1815 struct extent_buffer *child;
1816
1817 if (btrfs_header_nritems(mid) != 1)
1818 return 0;
1819
1820
1821 child = btrfs_read_node_slot(mid, 0);
1822 if (IS_ERR(child)) {
1823 ret = PTR_ERR(child);
1824 btrfs_handle_fs_error(fs_info, ret, NULL);
1825 goto enospc;
1826 }
1827
1828 btrfs_tree_lock(child);
1829 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
1830 BTRFS_NESTING_COW);
1831 if (ret) {
1832 btrfs_tree_unlock(child);
1833 free_extent_buffer(child);
1834 goto enospc;
1835 }
1836
1837 ret = tree_mod_log_insert_root(root->node, child, 1);
1838 BUG_ON(ret < 0);
1839 rcu_assign_pointer(root->node, child);
1840
1841 add_root_to_dirty_list(root);
1842 btrfs_tree_unlock(child);
1843
1844 path->locks[level] = 0;
1845 path->nodes[level] = NULL;
1846 btrfs_clean_tree_block(mid);
1847 btrfs_tree_unlock(mid);
1848
1849 free_extent_buffer(mid);
1850
1851 root_sub_used(root, mid->len);
1852 btrfs_free_tree_block(trans, root, mid, 0, 1);
1853
1854 free_extent_buffer_stale(mid);
1855 return 0;
1856 }
1857 if (btrfs_header_nritems(mid) >
1858 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
1859 return 0;
1860
1861 left = btrfs_read_node_slot(parent, pslot - 1);
1862 if (IS_ERR(left))
1863 left = NULL;
1864
1865 if (left) {
1866 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
1867 wret = btrfs_cow_block(trans, root, left,
1868 parent, pslot - 1, &left,
1869 BTRFS_NESTING_LEFT_COW);
1870 if (wret) {
1871 ret = wret;
1872 goto enospc;
1873 }
1874 }
1875
1876 right = btrfs_read_node_slot(parent, pslot + 1);
1877 if (IS_ERR(right))
1878 right = NULL;
1879
1880 if (right) {
1881 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
1882 wret = btrfs_cow_block(trans, root, right,
1883 parent, pslot + 1, &right,
1884 BTRFS_NESTING_RIGHT_COW);
1885 if (wret) {
1886 ret = wret;
1887 goto enospc;
1888 }
1889 }
1890
1891
1892 if (left) {
1893 orig_slot += btrfs_header_nritems(left);
1894 wret = push_node_left(trans, left, mid, 1);
1895 if (wret < 0)
1896 ret = wret;
1897 }
1898
1899
1900
1901
1902 if (right) {
1903 wret = push_node_left(trans, mid, right, 1);
1904 if (wret < 0 && wret != -ENOSPC)
1905 ret = wret;
1906 if (btrfs_header_nritems(right) == 0) {
1907 btrfs_clean_tree_block(right);
1908 btrfs_tree_unlock(right);
1909 del_ptr(root, path, level + 1, pslot + 1);
1910 root_sub_used(root, right->len);
1911 btrfs_free_tree_block(trans, root, right, 0, 1);
1912 free_extent_buffer_stale(right);
1913 right = NULL;
1914 } else {
1915 struct btrfs_disk_key right_key;
1916 btrfs_node_key(right, &right_key, 0);
1917 ret = tree_mod_log_insert_key(parent, pslot + 1,
1918 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1919 BUG_ON(ret < 0);
1920 btrfs_set_node_key(parent, &right_key, pslot + 1);
1921 btrfs_mark_buffer_dirty(parent);
1922 }
1923 }
1924 if (btrfs_header_nritems(mid) == 1) {
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934 if (!left) {
1935 ret = -EROFS;
1936 btrfs_handle_fs_error(fs_info, ret, NULL);
1937 goto enospc;
1938 }
1939 wret = balance_node_right(trans, mid, left);
1940 if (wret < 0) {
1941 ret = wret;
1942 goto enospc;
1943 }
1944 if (wret == 1) {
1945 wret = push_node_left(trans, left, mid, 1);
1946 if (wret < 0)
1947 ret = wret;
1948 }
1949 BUG_ON(wret == 1);
1950 }
1951 if (btrfs_header_nritems(mid) == 0) {
1952 btrfs_clean_tree_block(mid);
1953 btrfs_tree_unlock(mid);
1954 del_ptr(root, path, level + 1, pslot);
1955 root_sub_used(root, mid->len);
1956 btrfs_free_tree_block(trans, root, mid, 0, 1);
1957 free_extent_buffer_stale(mid);
1958 mid = NULL;
1959 } else {
1960
1961 struct btrfs_disk_key mid_key;
1962 btrfs_node_key(mid, &mid_key, 0);
1963 ret = tree_mod_log_insert_key(parent, pslot,
1964 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1965 BUG_ON(ret < 0);
1966 btrfs_set_node_key(parent, &mid_key, pslot);
1967 btrfs_mark_buffer_dirty(parent);
1968 }
1969
1970
1971 if (left) {
1972 if (btrfs_header_nritems(left) > orig_slot) {
1973 atomic_inc(&left->refs);
1974
1975 path->nodes[level] = left;
1976 path->slots[level + 1] -= 1;
1977 path->slots[level] = orig_slot;
1978 if (mid) {
1979 btrfs_tree_unlock(mid);
1980 free_extent_buffer(mid);
1981 }
1982 } else {
1983 orig_slot -= btrfs_header_nritems(left);
1984 path->slots[level] = orig_slot;
1985 }
1986 }
1987
1988 if (orig_ptr !=
1989 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1990 BUG();
1991enospc:
1992 if (right) {
1993 btrfs_tree_unlock(right);
1994 free_extent_buffer(right);
1995 }
1996 if (left) {
1997 if (path->nodes[level] != left)
1998 btrfs_tree_unlock(left);
1999 free_extent_buffer(left);
2000 }
2001 return ret;
2002}
2003
2004
2005
2006
2007
2008static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
2009 struct btrfs_root *root,
2010 struct btrfs_path *path, int level)
2011{
2012 struct btrfs_fs_info *fs_info = root->fs_info;
2013 struct extent_buffer *right = NULL;
2014 struct extent_buffer *mid;
2015 struct extent_buffer *left = NULL;
2016 struct extent_buffer *parent = NULL;
2017 int ret = 0;
2018 int wret;
2019 int pslot;
2020 int orig_slot = path->slots[level];
2021
2022 if (level == 0)
2023 return 1;
2024
2025 mid = path->nodes[level];
2026 WARN_ON(btrfs_header_generation(mid) != trans->transid);
2027
2028 if (level < BTRFS_MAX_LEVEL - 1) {
2029 parent = path->nodes[level + 1];
2030 pslot = path->slots[level + 1];
2031 }
2032
2033 if (!parent)
2034 return 1;
2035
2036 left = btrfs_read_node_slot(parent, pslot - 1);
2037 if (IS_ERR(left))
2038 left = NULL;
2039
2040
2041 if (left) {
2042 u32 left_nr;
2043
2044 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
2045
2046 left_nr = btrfs_header_nritems(left);
2047 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
2048 wret = 1;
2049 } else {
2050 ret = btrfs_cow_block(trans, root, left, parent,
2051 pslot - 1, &left,
2052 BTRFS_NESTING_LEFT_COW);
2053 if (ret)
2054 wret = 1;
2055 else {
2056 wret = push_node_left(trans, left, mid, 0);
2057 }
2058 }
2059 if (wret < 0)
2060 ret = wret;
2061 if (wret == 0) {
2062 struct btrfs_disk_key disk_key;
2063 orig_slot += left_nr;
2064 btrfs_node_key(mid, &disk_key, 0);
2065 ret = tree_mod_log_insert_key(parent, pslot,
2066 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2067 BUG_ON(ret < 0);
2068 btrfs_set_node_key(parent, &disk_key, pslot);
2069 btrfs_mark_buffer_dirty(parent);
2070 if (btrfs_header_nritems(left) > orig_slot) {
2071 path->nodes[level] = left;
2072 path->slots[level + 1] -= 1;
2073 path->slots[level] = orig_slot;
2074 btrfs_tree_unlock(mid);
2075 free_extent_buffer(mid);
2076 } else {
2077 orig_slot -=
2078 btrfs_header_nritems(left);
2079 path->slots[level] = orig_slot;
2080 btrfs_tree_unlock(left);
2081 free_extent_buffer(left);
2082 }
2083 return 0;
2084 }
2085 btrfs_tree_unlock(left);
2086 free_extent_buffer(left);
2087 }
2088 right = btrfs_read_node_slot(parent, pslot + 1);
2089 if (IS_ERR(right))
2090 right = NULL;
2091
2092
2093
2094
2095 if (right) {
2096 u32 right_nr;
2097
2098 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
2099
2100 right_nr = btrfs_header_nritems(right);
2101 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
2102 wret = 1;
2103 } else {
2104 ret = btrfs_cow_block(trans, root, right,
2105 parent, pslot + 1,
2106 &right, BTRFS_NESTING_RIGHT_COW);
2107 if (ret)
2108 wret = 1;
2109 else {
2110 wret = balance_node_right(trans, right, mid);
2111 }
2112 }
2113 if (wret < 0)
2114 ret = wret;
2115 if (wret == 0) {
2116 struct btrfs_disk_key disk_key;
2117
2118 btrfs_node_key(right, &disk_key, 0);
2119 ret = tree_mod_log_insert_key(parent, pslot + 1,
2120 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2121 BUG_ON(ret < 0);
2122 btrfs_set_node_key(parent, &disk_key, pslot + 1);
2123 btrfs_mark_buffer_dirty(parent);
2124
2125 if (btrfs_header_nritems(mid) <= orig_slot) {
2126 path->nodes[level] = right;
2127 path->slots[level + 1] += 1;
2128 path->slots[level] = orig_slot -
2129 btrfs_header_nritems(mid);
2130 btrfs_tree_unlock(mid);
2131 free_extent_buffer(mid);
2132 } else {
2133 btrfs_tree_unlock(right);
2134 free_extent_buffer(right);
2135 }
2136 return 0;
2137 }
2138 btrfs_tree_unlock(right);
2139 free_extent_buffer(right);
2140 }
2141 return 1;
2142}
2143
2144
2145
2146
2147
2148static void reada_for_search(struct btrfs_fs_info *fs_info,
2149 struct btrfs_path *path,
2150 int level, int slot, u64 objectid)
2151{
2152 struct extent_buffer *node;
2153 struct btrfs_disk_key disk_key;
2154 u32 nritems;
2155 u64 search;
2156 u64 target;
2157 u64 nread = 0;
2158 struct extent_buffer *eb;
2159 u32 nr;
2160 u32 blocksize;
2161 u32 nscan = 0;
2162
2163 if (level != 1)
2164 return;
2165
2166 if (!path->nodes[level])
2167 return;
2168
2169 node = path->nodes[level];
2170
2171 search = btrfs_node_blockptr(node, slot);
2172 blocksize = fs_info->nodesize;
2173 eb = find_extent_buffer(fs_info, search);
2174 if (eb) {
2175 free_extent_buffer(eb);
2176 return;
2177 }
2178
2179 target = search;
2180
2181 nritems = btrfs_header_nritems(node);
2182 nr = slot;
2183
2184 while (1) {
2185 if (path->reada == READA_BACK) {
2186 if (nr == 0)
2187 break;
2188 nr--;
2189 } else if (path->reada == READA_FORWARD) {
2190 nr++;
2191 if (nr >= nritems)
2192 break;
2193 }
2194 if (path->reada == READA_BACK && objectid) {
2195 btrfs_node_key(node, &disk_key, nr);
2196 if (btrfs_disk_key_objectid(&disk_key) != objectid)
2197 break;
2198 }
2199 search = btrfs_node_blockptr(node, nr);
2200 if ((search <= target && target - search <= 65536) ||
2201 (search > target && search - target <= 65536)) {
2202 btrfs_readahead_node_child(node, nr);
2203 nread += blocksize;
2204 }
2205 nscan++;
2206 if ((nread > 65536 || nscan > 32))
2207 break;
2208 }
2209}
2210
2211static noinline void reada_for_balance(struct btrfs_path *path, int level)
2212{
2213 struct extent_buffer *parent;
2214 int slot;
2215 int nritems;
2216
2217 parent = path->nodes[level + 1];
2218 if (!parent)
2219 return;
2220
2221 nritems = btrfs_header_nritems(parent);
2222 slot = path->slots[level + 1];
2223
2224 if (slot > 0)
2225 btrfs_readahead_node_child(parent, slot - 1);
2226 if (slot + 1 < nritems)
2227 btrfs_readahead_node_child(parent, slot + 1);
2228}
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244static noinline void unlock_up(struct btrfs_path *path, int level,
2245 int lowest_unlock, int min_write_lock_level,
2246 int *write_lock_level)
2247{
2248 int i;
2249 int skip_level = level;
2250 int no_skips = 0;
2251 struct extent_buffer *t;
2252
2253 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2254 if (!path->nodes[i])
2255 break;
2256 if (!path->locks[i])
2257 break;
2258 if (!no_skips && path->slots[i] == 0) {
2259 skip_level = i + 1;
2260 continue;
2261 }
2262 if (!no_skips && path->keep_locks) {
2263 u32 nritems;
2264 t = path->nodes[i];
2265 nritems = btrfs_header_nritems(t);
2266 if (nritems < 1 || path->slots[i] >= nritems - 1) {
2267 skip_level = i + 1;
2268 continue;
2269 }
2270 }
2271 if (skip_level < i && i >= lowest_unlock)
2272 no_skips = 1;
2273
2274 t = path->nodes[i];
2275 if (i >= lowest_unlock && i > skip_level) {
2276 btrfs_tree_unlock_rw(t, path->locks[i]);
2277 path->locks[i] = 0;
2278 if (write_lock_level &&
2279 i > min_write_lock_level &&
2280 i <= *write_lock_level) {
2281 *write_lock_level = i - 1;
2282 }
2283 }
2284 }
2285}
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295static int
2296read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2297 struct extent_buffer **eb_ret, int level, int slot,
2298 const struct btrfs_key *key)
2299{
2300 struct btrfs_fs_info *fs_info = root->fs_info;
2301 u64 blocknr;
2302 u64 gen;
2303 struct extent_buffer *tmp;
2304 struct btrfs_key first_key;
2305 int ret;
2306 int parent_level;
2307
2308 blocknr = btrfs_node_blockptr(*eb_ret, slot);
2309 gen = btrfs_node_ptr_generation(*eb_ret, slot);
2310 parent_level = btrfs_header_level(*eb_ret);
2311 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
2312
2313 tmp = find_extent_buffer(fs_info, blocknr);
2314 if (tmp) {
2315
2316 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
2317
2318
2319
2320
2321
2322 if (btrfs_verify_level_key(tmp,
2323 parent_level - 1, &first_key, gen)) {
2324 free_extent_buffer(tmp);
2325 return -EUCLEAN;
2326 }
2327 *eb_ret = tmp;
2328 return 0;
2329 }
2330
2331
2332 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
2333 if (!ret) {
2334 *eb_ret = tmp;
2335 return 0;
2336 }
2337 free_extent_buffer(tmp);
2338 btrfs_release_path(p);
2339 return -EIO;
2340 }
2341
2342
2343
2344
2345
2346
2347
2348
2349 btrfs_unlock_up_safe(p, level + 1);
2350
2351 if (p->reada != READA_NONE)
2352 reada_for_search(fs_info, p, level, slot, key->objectid);
2353
2354 ret = -EAGAIN;
2355 tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
2356 gen, parent_level - 1, &first_key);
2357 if (!IS_ERR(tmp)) {
2358
2359
2360
2361
2362
2363
2364 if (!extent_buffer_uptodate(tmp))
2365 ret = -EIO;
2366 free_extent_buffer(tmp);
2367 } else {
2368 ret = PTR_ERR(tmp);
2369 }
2370
2371 btrfs_release_path(p);
2372 return ret;
2373}
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384static int
2385setup_nodes_for_search(struct btrfs_trans_handle *trans,
2386 struct btrfs_root *root, struct btrfs_path *p,
2387 struct extent_buffer *b, int level, int ins_len,
2388 int *write_lock_level)
2389{
2390 struct btrfs_fs_info *fs_info = root->fs_info;
2391 int ret = 0;
2392
2393 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
2394 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
2395
2396 if (*write_lock_level < level + 1) {
2397 *write_lock_level = level + 1;
2398 btrfs_release_path(p);
2399 return -EAGAIN;
2400 }
2401
2402 reada_for_balance(p, level);
2403 ret = split_node(trans, root, p, level);
2404
2405 b = p->nodes[level];
2406 } else if (ins_len < 0 && btrfs_header_nritems(b) <
2407 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
2408
2409 if (*write_lock_level < level + 1) {
2410 *write_lock_level = level + 1;
2411 btrfs_release_path(p);
2412 return -EAGAIN;
2413 }
2414
2415 reada_for_balance(p, level);
2416 ret = balance_level(trans, root, p, level);
2417 if (ret)
2418 return ret;
2419
2420 b = p->nodes[level];
2421 if (!b) {
2422 btrfs_release_path(p);
2423 return -EAGAIN;
2424 }
2425 BUG_ON(btrfs_header_nritems(b) == 1);
2426 }
2427 return ret;
2428}
2429
2430int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
2431 u64 iobjectid, u64 ioff, u8 key_type,
2432 struct btrfs_key *found_key)
2433{
2434 int ret;
2435 struct btrfs_key key;
2436 struct extent_buffer *eb;
2437
2438 ASSERT(path);
2439 ASSERT(found_key);
2440
2441 key.type = key_type;
2442 key.objectid = iobjectid;
2443 key.offset = ioff;
2444
2445 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
2446 if (ret < 0)
2447 return ret;
2448
2449 eb = path->nodes[0];
2450 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
2451 ret = btrfs_next_leaf(fs_root, path);
2452 if (ret)
2453 return ret;
2454 eb = path->nodes[0];
2455 }
2456
2457 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
2458 if (found_key->type != key.type ||
2459 found_key->objectid != key.objectid)
2460 return 1;
2461
2462 return 0;
2463}
2464
2465static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
2466 struct btrfs_path *p,
2467 int write_lock_level)
2468{
2469 struct btrfs_fs_info *fs_info = root->fs_info;
2470 struct extent_buffer *b;
2471 int root_lock;
2472 int level = 0;
2473
2474
2475 root_lock = BTRFS_READ_LOCK;
2476
2477 if (p->search_commit_root) {
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487 if (p->need_commit_sem) {
2488 down_read(&fs_info->commit_root_sem);
2489 b = btrfs_clone_extent_buffer(root->commit_root);
2490 up_read(&fs_info->commit_root_sem);
2491 if (!b)
2492 return ERR_PTR(-ENOMEM);
2493
2494 } else {
2495 b = root->commit_root;
2496 atomic_inc(&b->refs);
2497 }
2498 level = btrfs_header_level(b);
2499
2500
2501
2502
2503 ASSERT(p->skip_locking == 1);
2504
2505 goto out;
2506 }
2507
2508 if (p->skip_locking) {
2509 b = btrfs_root_node(root);
2510 level = btrfs_header_level(b);
2511 goto out;
2512 }
2513
2514
2515
2516
2517
2518 if (write_lock_level < BTRFS_MAX_LEVEL) {
2519
2520
2521
2522
2523 b = btrfs_read_lock_root_node(root);
2524 level = btrfs_header_level(b);
2525 if (level > write_lock_level)
2526 goto out;
2527
2528
2529 btrfs_tree_read_unlock(b);
2530 free_extent_buffer(b);
2531 }
2532
2533 b = btrfs_lock_root_node(root);
2534 root_lock = BTRFS_WRITE_LOCK;
2535
2536
2537 level = btrfs_header_level(b);
2538
2539out:
2540 p->nodes[level] = b;
2541 if (!p->skip_locking)
2542 p->locks[level] = root_lock;
2543
2544
2545
2546 return b;
2547}
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2582 const struct btrfs_key *key, struct btrfs_path *p,
2583 int ins_len, int cow)
2584{
2585 struct extent_buffer *b;
2586 int slot;
2587 int ret;
2588 int err;
2589 int level;
2590 int lowest_unlock = 1;
2591
2592 int write_lock_level = 0;
2593 u8 lowest_level = 0;
2594 int min_write_lock_level;
2595 int prev_cmp;
2596
2597 lowest_level = p->lowest_level;
2598 WARN_ON(lowest_level && ins_len > 0);
2599 WARN_ON(p->nodes[0] != NULL);
2600 BUG_ON(!cow && ins_len);
2601
2602 if (ins_len < 0) {
2603 lowest_unlock = 2;
2604
2605
2606
2607
2608
2609 write_lock_level = 2;
2610 } else if (ins_len > 0) {
2611
2612
2613
2614
2615 write_lock_level = 1;
2616 }
2617
2618 if (!cow)
2619 write_lock_level = -1;
2620
2621 if (cow && (p->keep_locks || p->lowest_level))
2622 write_lock_level = BTRFS_MAX_LEVEL;
2623
2624 min_write_lock_level = write_lock_level;
2625
2626again:
2627 prev_cmp = -1;
2628 b = btrfs_search_slot_get_root(root, p, write_lock_level);
2629 if (IS_ERR(b)) {
2630 ret = PTR_ERR(b);
2631 goto done;
2632 }
2633
2634 while (b) {
2635 int dec = 0;
2636
2637 level = btrfs_header_level(b);
2638
2639 if (cow) {
2640 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
2641
2642
2643
2644
2645
2646
2647 if (!should_cow_block(trans, root, b)) {
2648 trans->dirty = true;
2649 goto cow_done;
2650 }
2651
2652
2653
2654
2655
2656 if (level > write_lock_level ||
2657 (level + 1 > write_lock_level &&
2658 level + 1 < BTRFS_MAX_LEVEL &&
2659 p->nodes[level + 1])) {
2660 write_lock_level = level + 1;
2661 btrfs_release_path(p);
2662 goto again;
2663 }
2664
2665 if (last_level)
2666 err = btrfs_cow_block(trans, root, b, NULL, 0,
2667 &b,
2668 BTRFS_NESTING_COW);
2669 else
2670 err = btrfs_cow_block(trans, root, b,
2671 p->nodes[level + 1],
2672 p->slots[level + 1], &b,
2673 BTRFS_NESTING_COW);
2674 if (err) {
2675 ret = err;
2676 goto done;
2677 }
2678 }
2679cow_done:
2680 p->nodes[level] = b;
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697 if (!ins_len && !p->keep_locks) {
2698 int u = level + 1;
2699
2700 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2701 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2702 p->locks[u] = 0;
2703 }
2704 }
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714 if (prev_cmp == 0) {
2715 slot = 0;
2716 ret = 0;
2717 } else {
2718 ret = btrfs_bin_search(b, key, &slot);
2719 prev_cmp = ret;
2720 if (ret < 0)
2721 goto done;
2722 }
2723
2724 if (level == 0) {
2725 p->slots[level] = slot;
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736 if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
2737 ASSERT(ins_len >= sizeof(struct btrfs_item));
2738 ins_len -= sizeof(struct btrfs_item);
2739 }
2740 if (ins_len > 0 &&
2741 btrfs_leaf_free_space(b) < ins_len) {
2742 if (write_lock_level < 1) {
2743 write_lock_level = 1;
2744 btrfs_release_path(p);
2745 goto again;
2746 }
2747
2748 err = split_leaf(trans, root, key,
2749 p, ins_len, ret == 0);
2750
2751 BUG_ON(err > 0);
2752 if (err) {
2753 ret = err;
2754 goto done;
2755 }
2756 }
2757 if (!p->search_for_split)
2758 unlock_up(p, level, lowest_unlock,
2759 min_write_lock_level, NULL);
2760 goto done;
2761 }
2762 if (ret && slot > 0) {
2763 dec = 1;
2764 slot--;
2765 }
2766 p->slots[level] = slot;
2767 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
2768 &write_lock_level);
2769 if (err == -EAGAIN)
2770 goto again;
2771 if (err) {
2772 ret = err;
2773 goto done;
2774 }
2775 b = p->nodes[level];
2776 slot = p->slots[level];
2777
2778
2779
2780
2781
2782
2783 if (slot == 0 && ins_len && write_lock_level < level + 1) {
2784 write_lock_level = level + 1;
2785 btrfs_release_path(p);
2786 goto again;
2787 }
2788
2789 unlock_up(p, level, lowest_unlock, min_write_lock_level,
2790 &write_lock_level);
2791
2792 if (level == lowest_level) {
2793 if (dec)
2794 p->slots[level]++;
2795 goto done;
2796 }
2797
2798 err = read_block_for_search(root, p, &b, level, slot, key);
2799 if (err == -EAGAIN)
2800 goto again;
2801 if (err) {
2802 ret = err;
2803 goto done;
2804 }
2805
2806 if (!p->skip_locking) {
2807 level = btrfs_header_level(b);
2808 if (level <= write_lock_level) {
2809 btrfs_tree_lock(b);
2810 p->locks[level] = BTRFS_WRITE_LOCK;
2811 } else {
2812 btrfs_tree_read_lock(b);
2813 p->locks[level] = BTRFS_READ_LOCK;
2814 }
2815 p->nodes[level] = b;
2816 }
2817 }
2818 ret = 1;
2819done:
2820 if (ret < 0 && !p->skip_release_on_error)
2821 btrfs_release_path(p);
2822 return ret;
2823}
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
2837 struct btrfs_path *p, u64 time_seq)
2838{
2839 struct btrfs_fs_info *fs_info = root->fs_info;
2840 struct extent_buffer *b;
2841 int slot;
2842 int ret;
2843 int err;
2844 int level;
2845 int lowest_unlock = 1;
2846 u8 lowest_level = 0;
2847
2848 lowest_level = p->lowest_level;
2849 WARN_ON(p->nodes[0] != NULL);
2850
2851 if (p->search_commit_root) {
2852 BUG_ON(time_seq);
2853 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2854 }
2855
2856again:
2857 b = get_old_root(root, time_seq);
2858 if (!b) {
2859 ret = -EIO;
2860 goto done;
2861 }
2862 level = btrfs_header_level(b);
2863 p->locks[level] = BTRFS_READ_LOCK;
2864
2865 while (b) {
2866 int dec = 0;
2867
2868 level = btrfs_header_level(b);
2869 p->nodes[level] = b;
2870
2871
2872
2873
2874
2875
2876
2877 btrfs_unlock_up_safe(p, level + 1);
2878
2879 ret = btrfs_bin_search(b, key, &slot);
2880 if (ret < 0)
2881 goto done;
2882
2883 if (level == 0) {
2884 p->slots[level] = slot;
2885 unlock_up(p, level, lowest_unlock, 0, NULL);
2886 goto done;
2887 }
2888
2889 if (ret && slot > 0) {
2890 dec = 1;
2891 slot--;
2892 }
2893 p->slots[level] = slot;
2894 unlock_up(p, level, lowest_unlock, 0, NULL);
2895
2896 if (level == lowest_level) {
2897 if (dec)
2898 p->slots[level]++;
2899 goto done;
2900 }
2901
2902 err = read_block_for_search(root, p, &b, level, slot, key);
2903 if (err == -EAGAIN)
2904 goto again;
2905 if (err) {
2906 ret = err;
2907 goto done;
2908 }
2909
2910 level = btrfs_header_level(b);
2911 btrfs_tree_read_lock(b);
2912 b = tree_mod_log_rewind(fs_info, p, b, time_seq);
2913 if (!b) {
2914 ret = -ENOMEM;
2915 goto done;
2916 }
2917 p->locks[level] = BTRFS_READ_LOCK;
2918 p->nodes[level] = b;
2919 }
2920 ret = 1;
2921done:
2922 if (ret < 0)
2923 btrfs_release_path(p);
2924
2925 return ret;
2926}
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940int btrfs_search_slot_for_read(struct btrfs_root *root,
2941 const struct btrfs_key *key,
2942 struct btrfs_path *p, int find_higher,
2943 int return_any)
2944{
2945 int ret;
2946 struct extent_buffer *leaf;
2947
2948again:
2949 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2950 if (ret <= 0)
2951 return ret;
2952
2953
2954
2955
2956
2957
2958
2959 leaf = p->nodes[0];
2960
2961 if (find_higher) {
2962 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2963 ret = btrfs_next_leaf(root, p);
2964 if (ret <= 0)
2965 return ret;
2966 if (!return_any)
2967 return 1;
2968
2969
2970
2971
2972 return_any = 0;
2973 find_higher = 0;
2974 btrfs_release_path(p);
2975 goto again;
2976 }
2977 } else {
2978 if (p->slots[0] == 0) {
2979 ret = btrfs_prev_leaf(root, p);
2980 if (ret < 0)
2981 return ret;
2982 if (!ret) {
2983 leaf = p->nodes[0];
2984 if (p->slots[0] == btrfs_header_nritems(leaf))
2985 p->slots[0]--;
2986 return 0;
2987 }
2988 if (!return_any)
2989 return 1;
2990
2991
2992
2993
2994 return_any = 0;
2995 find_higher = 1;
2996 btrfs_release_path(p);
2997 goto again;
2998 } else {
2999 --p->slots[0];
3000 }
3001 }
3002 return 0;
3003}
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013static void fixup_low_keys(struct btrfs_path *path,
3014 struct btrfs_disk_key *key, int level)
3015{
3016 int i;
3017 struct extent_buffer *t;
3018 int ret;
3019
3020 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
3021 int tslot = path->slots[i];
3022
3023 if (!path->nodes[i])
3024 break;
3025 t = path->nodes[i];
3026 ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
3027 GFP_ATOMIC);
3028 BUG_ON(ret < 0);
3029 btrfs_set_node_key(t, key, tslot);
3030 btrfs_mark_buffer_dirty(path->nodes[i]);
3031 if (tslot != 0)
3032 break;
3033 }
3034}
3035
3036
3037
3038
3039
3040
3041
3042void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3043 struct btrfs_path *path,
3044 const struct btrfs_key *new_key)
3045{
3046 struct btrfs_disk_key disk_key;
3047 struct extent_buffer *eb;
3048 int slot;
3049
3050 eb = path->nodes[0];
3051 slot = path->slots[0];
3052 if (slot > 0) {
3053 btrfs_item_key(eb, &disk_key, slot - 1);
3054 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
3055 btrfs_crit(fs_info,
3056 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3057 slot, btrfs_disk_key_objectid(&disk_key),
3058 btrfs_disk_key_type(&disk_key),
3059 btrfs_disk_key_offset(&disk_key),
3060 new_key->objectid, new_key->type,
3061 new_key->offset);
3062 btrfs_print_leaf(eb);
3063 BUG();
3064 }
3065 }
3066 if (slot < btrfs_header_nritems(eb) - 1) {
3067 btrfs_item_key(eb, &disk_key, slot + 1);
3068 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
3069 btrfs_crit(fs_info,
3070 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3071 slot, btrfs_disk_key_objectid(&disk_key),
3072 btrfs_disk_key_type(&disk_key),
3073 btrfs_disk_key_offset(&disk_key),
3074 new_key->objectid, new_key->type,
3075 new_key->offset);
3076 btrfs_print_leaf(eb);
3077 BUG();
3078 }
3079 }
3080
3081 btrfs_cpu_key_to_disk(&disk_key, new_key);
3082 btrfs_set_item_key(eb, &disk_key, slot);
3083 btrfs_mark_buffer_dirty(eb);
3084 if (slot == 0)
3085 fixup_low_keys(path, &disk_key, 1);
3086}
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108static bool check_sibling_keys(struct extent_buffer *left,
3109 struct extent_buffer *right)
3110{
3111 struct btrfs_key left_last;
3112 struct btrfs_key right_first;
3113 int level = btrfs_header_level(left);
3114 int nr_left = btrfs_header_nritems(left);
3115 int nr_right = btrfs_header_nritems(right);
3116
3117
3118 if (!nr_left || !nr_right)
3119 return false;
3120
3121 if (level) {
3122 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
3123 btrfs_node_key_to_cpu(right, &right_first, 0);
3124 } else {
3125 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
3126 btrfs_item_key_to_cpu(right, &right_first, 0);
3127 }
3128
3129 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
3130 btrfs_crit(left->fs_info,
3131"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
3132 left_last.objectid, left_last.type,
3133 left_last.offset, right_first.objectid,
3134 right_first.type, right_first.offset);
3135 return true;
3136 }
3137 return false;
3138}
3139
3140
3141
3142
3143
3144
3145
3146
3147static int push_node_left(struct btrfs_trans_handle *trans,
3148 struct extent_buffer *dst,
3149 struct extent_buffer *src, int empty)
3150{
3151 struct btrfs_fs_info *fs_info = trans->fs_info;
3152 int push_items = 0;
3153 int src_nritems;
3154 int dst_nritems;
3155 int ret = 0;
3156
3157 src_nritems = btrfs_header_nritems(src);
3158 dst_nritems = btrfs_header_nritems(dst);
3159 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
3160 WARN_ON(btrfs_header_generation(src) != trans->transid);
3161 WARN_ON(btrfs_header_generation(dst) != trans->transid);
3162
3163 if (!empty && src_nritems <= 8)
3164 return 1;
3165
3166 if (push_items <= 0)
3167 return 1;
3168
3169 if (empty) {
3170 push_items = min(src_nritems, push_items);
3171 if (push_items < src_nritems) {
3172
3173
3174
3175 if (src_nritems - push_items < 8) {
3176 if (push_items <= 8)
3177 return 1;
3178 push_items -= 8;
3179 }
3180 }
3181 } else
3182 push_items = min(src_nritems - 8, push_items);
3183
3184
3185 if (check_sibling_keys(dst, src)) {
3186 ret = -EUCLEAN;
3187 btrfs_abort_transaction(trans, ret);
3188 return ret;
3189 }
3190 ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
3191 if (ret) {
3192 btrfs_abort_transaction(trans, ret);
3193 return ret;
3194 }
3195 copy_extent_buffer(dst, src,
3196 btrfs_node_key_ptr_offset(dst_nritems),
3197 btrfs_node_key_ptr_offset(0),
3198 push_items * sizeof(struct btrfs_key_ptr));
3199
3200 if (push_items < src_nritems) {
3201
3202
3203
3204
3205 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
3206 btrfs_node_key_ptr_offset(push_items),
3207 (src_nritems - push_items) *
3208 sizeof(struct btrfs_key_ptr));
3209 }
3210 btrfs_set_header_nritems(src, src_nritems - push_items);
3211 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3212 btrfs_mark_buffer_dirty(src);
3213 btrfs_mark_buffer_dirty(dst);
3214
3215 return ret;
3216}
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227static int balance_node_right(struct btrfs_trans_handle *trans,
3228 struct extent_buffer *dst,
3229 struct extent_buffer *src)
3230{
3231 struct btrfs_fs_info *fs_info = trans->fs_info;
3232 int push_items = 0;
3233 int max_push;
3234 int src_nritems;
3235 int dst_nritems;
3236 int ret = 0;
3237
3238 WARN_ON(btrfs_header_generation(src) != trans->transid);
3239 WARN_ON(btrfs_header_generation(dst) != trans->transid);
3240
3241 src_nritems = btrfs_header_nritems(src);
3242 dst_nritems = btrfs_header_nritems(dst);
3243 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
3244 if (push_items <= 0)
3245 return 1;
3246
3247 if (src_nritems < 4)
3248 return 1;
3249
3250 max_push = src_nritems / 2 + 1;
3251
3252 if (max_push >= src_nritems)
3253 return 1;
3254
3255 if (max_push < push_items)
3256 push_items = max_push;
3257
3258
3259 if (check_sibling_keys(src, dst)) {
3260 ret = -EUCLEAN;
3261 btrfs_abort_transaction(trans, ret);
3262 return ret;
3263 }
3264 ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
3265 BUG_ON(ret < 0);
3266 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
3267 btrfs_node_key_ptr_offset(0),
3268 (dst_nritems) *
3269 sizeof(struct btrfs_key_ptr));
3270
3271 ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
3272 push_items);
3273 if (ret) {
3274 btrfs_abort_transaction(trans, ret);
3275 return ret;
3276 }
3277 copy_extent_buffer(dst, src,
3278 btrfs_node_key_ptr_offset(0),
3279 btrfs_node_key_ptr_offset(src_nritems - push_items),
3280 push_items * sizeof(struct btrfs_key_ptr));
3281
3282 btrfs_set_header_nritems(src, src_nritems - push_items);
3283 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3284
3285 btrfs_mark_buffer_dirty(src);
3286 btrfs_mark_buffer_dirty(dst);
3287
3288 return ret;
3289}
3290
3291
3292
3293
3294
3295
3296
3297
3298static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3299 struct btrfs_root *root,
3300 struct btrfs_path *path, int level)
3301{
3302 struct btrfs_fs_info *fs_info = root->fs_info;
3303 u64 lower_gen;
3304 struct extent_buffer *lower;
3305 struct extent_buffer *c;
3306 struct extent_buffer *old;
3307 struct btrfs_disk_key lower_key;
3308 int ret;
3309
3310 BUG_ON(path->nodes[level]);
3311 BUG_ON(path->nodes[level-1] != root->node);
3312
3313 lower = path->nodes[level-1];
3314 if (level == 1)
3315 btrfs_item_key(lower, &lower_key, 0);
3316 else
3317 btrfs_node_key(lower, &lower_key, 0);
3318
3319 c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
3320 root->node->start, 0,
3321 BTRFS_NESTING_NEW_ROOT);
3322 if (IS_ERR(c))
3323 return PTR_ERR(c);
3324
3325 root_add_used(root, fs_info->nodesize);
3326
3327 btrfs_set_header_nritems(c, 1);
3328 btrfs_set_node_key(c, &lower_key, 0);
3329 btrfs_set_node_blockptr(c, 0, lower->start);
3330 lower_gen = btrfs_header_generation(lower);
3331 WARN_ON(lower_gen != trans->transid);
3332
3333 btrfs_set_node_ptr_generation(c, 0, lower_gen);
3334
3335 btrfs_mark_buffer_dirty(c);
3336
3337 old = root->node;
3338 ret = tree_mod_log_insert_root(root->node, c, 0);
3339 BUG_ON(ret < 0);
3340 rcu_assign_pointer(root->node, c);
3341
3342
3343 free_extent_buffer(old);
3344
3345 add_root_to_dirty_list(root);
3346 atomic_inc(&c->refs);
3347 path->nodes[level] = c;
3348 path->locks[level] = BTRFS_WRITE_LOCK;
3349 path->slots[level] = 0;
3350 return 0;
3351}
3352
3353
3354
3355
3356
3357
3358
3359
3360static void insert_ptr(struct btrfs_trans_handle *trans,
3361 struct btrfs_path *path,
3362 struct btrfs_disk_key *key, u64 bytenr,
3363 int slot, int level)
3364{
3365 struct extent_buffer *lower;
3366 int nritems;
3367 int ret;
3368
3369 BUG_ON(!path->nodes[level]);
3370 btrfs_assert_tree_locked(path->nodes[level]);
3371 lower = path->nodes[level];
3372 nritems = btrfs_header_nritems(lower);
3373 BUG_ON(slot > nritems);
3374 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
3375 if (slot != nritems) {
3376 if (level) {
3377 ret = tree_mod_log_insert_move(lower, slot + 1, slot,
3378 nritems - slot);
3379 BUG_ON(ret < 0);
3380 }
3381 memmove_extent_buffer(lower,
3382 btrfs_node_key_ptr_offset(slot + 1),
3383 btrfs_node_key_ptr_offset(slot),
3384 (nritems - slot) * sizeof(struct btrfs_key_ptr));
3385 }
3386 if (level) {
3387 ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
3388 GFP_NOFS);
3389 BUG_ON(ret < 0);
3390 }
3391 btrfs_set_node_key(lower, key, slot);
3392 btrfs_set_node_blockptr(lower, slot, bytenr);
3393 WARN_ON(trans->transid == 0);
3394 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
3395 btrfs_set_header_nritems(lower, nritems + 1);
3396 btrfs_mark_buffer_dirty(lower);
3397}
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408static noinline int split_node(struct btrfs_trans_handle *trans,
3409 struct btrfs_root *root,
3410 struct btrfs_path *path, int level)
3411{
3412 struct btrfs_fs_info *fs_info = root->fs_info;
3413 struct extent_buffer *c;
3414 struct extent_buffer *split;
3415 struct btrfs_disk_key disk_key;
3416 int mid;
3417 int ret;
3418 u32 c_nritems;
3419
3420 c = path->nodes[level];
3421 WARN_ON(btrfs_header_generation(c) != trans->transid);
3422 if (c == root->node) {
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433 ret = insert_new_root(trans, root, path, level + 1);
3434 if (ret)
3435 return ret;
3436 } else {
3437 ret = push_nodes_for_insert(trans, root, path, level);
3438 c = path->nodes[level];
3439 if (!ret && btrfs_header_nritems(c) <
3440 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
3441 return 0;
3442 if (ret < 0)
3443 return ret;
3444 }
3445
3446 c_nritems = btrfs_header_nritems(c);
3447 mid = (c_nritems + 1) / 2;
3448 btrfs_node_key(c, &disk_key, mid);
3449
3450 split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
3451 c->start, 0, BTRFS_NESTING_SPLIT);
3452 if (IS_ERR(split))
3453 return PTR_ERR(split);
3454
3455 root_add_used(root, fs_info->nodesize);
3456 ASSERT(btrfs_header_level(c) == level);
3457
3458 ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
3459 if (ret) {
3460 btrfs_abort_transaction(trans, ret);
3461 return ret;
3462 }
3463 copy_extent_buffer(split, c,
3464 btrfs_node_key_ptr_offset(0),
3465 btrfs_node_key_ptr_offset(mid),
3466 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
3467 btrfs_set_header_nritems(split, c_nritems - mid);
3468 btrfs_set_header_nritems(c, mid);
3469
3470 btrfs_mark_buffer_dirty(c);
3471 btrfs_mark_buffer_dirty(split);
3472
3473 insert_ptr(trans, path, &disk_key, split->start,
3474 path->slots[level + 1] + 1, level + 1);
3475
3476 if (path->slots[level] >= mid) {
3477 path->slots[level] -= mid;
3478 btrfs_tree_unlock(c);
3479 free_extent_buffer(c);
3480 path->nodes[level] = split;
3481 path->slots[level + 1] += 1;
3482 } else {
3483 btrfs_tree_unlock(split);
3484 free_extent_buffer(split);
3485 }
3486 return 0;
3487}
3488
3489
3490
3491
3492
3493
3494static int leaf_space_used(struct extent_buffer *l, int start, int nr)
3495{
3496 struct btrfs_item *start_item;
3497 struct btrfs_item *end_item;
3498 int data_len;
3499 int nritems = btrfs_header_nritems(l);
3500 int end = min(nritems, start + nr) - 1;
3501
3502 if (!nr)
3503 return 0;
3504 start_item = btrfs_item_nr(start);
3505 end_item = btrfs_item_nr(end);
3506 data_len = btrfs_item_offset(l, start_item) +
3507 btrfs_item_size(l, start_item);
3508 data_len = data_len - btrfs_item_offset(l, end_item);
3509 data_len += sizeof(struct btrfs_item) * nr;
3510 WARN_ON(data_len < 0);
3511 return data_len;
3512}
3513
3514
3515
3516
3517
3518
3519noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
3520{
3521 struct btrfs_fs_info *fs_info = leaf->fs_info;
3522 int nritems = btrfs_header_nritems(leaf);
3523 int ret;
3524
3525 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
3526 if (ret < 0) {
3527 btrfs_crit(fs_info,
3528 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
3529 ret,
3530 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
3531 leaf_space_used(leaf, 0, nritems), nritems);
3532 }
3533 return ret;
3534}
3535
3536
3537
3538
3539
3540static noinline int __push_leaf_right(struct btrfs_path *path,
3541 int data_size, int empty,
3542 struct extent_buffer *right,
3543 int free_space, u32 left_nritems,
3544 u32 min_slot)
3545{
3546 struct btrfs_fs_info *fs_info = right->fs_info;
3547 struct extent_buffer *left = path->nodes[0];
3548 struct extent_buffer *upper = path->nodes[1];
3549 struct btrfs_map_token token;
3550 struct btrfs_disk_key disk_key;
3551 int slot;
3552 u32 i;
3553 int push_space = 0;
3554 int push_items = 0;
3555 struct btrfs_item *item;
3556 u32 nr;
3557 u32 right_nritems;
3558 u32 data_end;
3559 u32 this_item_size;
3560
3561 if (empty)
3562 nr = 0;
3563 else
3564 nr = max_t(u32, 1, min_slot);
3565
3566 if (path->slots[0] >= left_nritems)
3567 push_space += data_size;
3568
3569 slot = path->slots[1];
3570 i = left_nritems - 1;
3571 while (i >= nr) {
3572 item = btrfs_item_nr(i);
3573
3574 if (!empty && push_items > 0) {
3575 if (path->slots[0] > i)
3576 break;
3577 if (path->slots[0] == i) {
3578 int space = btrfs_leaf_free_space(left);
3579
3580 if (space + push_space * 2 > free_space)
3581 break;
3582 }
3583 }
3584
3585 if (path->slots[0] == i)
3586 push_space += data_size;
3587
3588 this_item_size = btrfs_item_size(left, item);
3589 if (this_item_size + sizeof(*item) + push_space > free_space)
3590 break;
3591
3592 push_items++;
3593 push_space += this_item_size + sizeof(*item);
3594 if (i == 0)
3595 break;
3596 i--;
3597 }
3598
3599 if (push_items == 0)
3600 goto out_unlock;
3601
3602 WARN_ON(!empty && push_items == left_nritems);
3603
3604
3605 right_nritems = btrfs_header_nritems(right);
3606
3607 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
3608 push_space -= leaf_data_end(left);
3609
3610
3611 data_end = leaf_data_end(right);
3612 memmove_extent_buffer(right,
3613 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
3614 BTRFS_LEAF_DATA_OFFSET + data_end,
3615 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
3616
3617
3618 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
3619 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
3620 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
3621 push_space);
3622
3623 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
3624 btrfs_item_nr_offset(0),
3625 right_nritems * sizeof(struct btrfs_item));
3626
3627
3628 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
3629 btrfs_item_nr_offset(left_nritems - push_items),
3630 push_items * sizeof(struct btrfs_item));
3631
3632
3633 btrfs_init_map_token(&token, right);
3634 right_nritems += push_items;
3635 btrfs_set_header_nritems(right, right_nritems);
3636 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
3637 for (i = 0; i < right_nritems; i++) {
3638 item = btrfs_item_nr(i);
3639 push_space -= btrfs_token_item_size(&token, item);
3640 btrfs_set_token_item_offset(&token, item, push_space);
3641 }
3642
3643 left_nritems -= push_items;
3644 btrfs_set_header_nritems(left, left_nritems);
3645
3646 if (left_nritems)
3647 btrfs_mark_buffer_dirty(left);
3648 else
3649 btrfs_clean_tree_block(left);
3650
3651 btrfs_mark_buffer_dirty(right);
3652
3653 btrfs_item_key(right, &disk_key, 0);
3654 btrfs_set_node_key(upper, &disk_key, slot + 1);
3655 btrfs_mark_buffer_dirty(upper);
3656
3657
3658 if (path->slots[0] >= left_nritems) {
3659 path->slots[0] -= left_nritems;
3660 if (btrfs_header_nritems(path->nodes[0]) == 0)
3661 btrfs_clean_tree_block(path->nodes[0]);
3662 btrfs_tree_unlock(path->nodes[0]);
3663 free_extent_buffer(path->nodes[0]);
3664 path->nodes[0] = right;
3665 path->slots[1] += 1;
3666 } else {
3667 btrfs_tree_unlock(right);
3668 free_extent_buffer(right);
3669 }
3670 return 0;
3671
3672out_unlock:
3673 btrfs_tree_unlock(right);
3674 free_extent_buffer(right);
3675 return 1;
3676}
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3689 *root, struct btrfs_path *path,
3690 int min_data_size, int data_size,
3691 int empty, u32 min_slot)
3692{
3693 struct extent_buffer *left = path->nodes[0];
3694 struct extent_buffer *right;
3695 struct extent_buffer *upper;
3696 int slot;
3697 int free_space;
3698 u32 left_nritems;
3699 int ret;
3700
3701 if (!path->nodes[1])
3702 return 1;
3703
3704 slot = path->slots[1];
3705 upper = path->nodes[1];
3706 if (slot >= btrfs_header_nritems(upper) - 1)
3707 return 1;
3708
3709 btrfs_assert_tree_locked(path->nodes[1]);
3710
3711 right = btrfs_read_node_slot(upper, slot + 1);
3712
3713
3714
3715
3716 if (IS_ERR(right))
3717 return 1;
3718
3719 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
3720
3721 free_space = btrfs_leaf_free_space(right);
3722 if (free_space < data_size)
3723 goto out_unlock;
3724
3725
3726 ret = btrfs_cow_block(trans, root, right, upper,
3727 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
3728 if (ret)
3729 goto out_unlock;
3730
3731 free_space = btrfs_leaf_free_space(right);
3732 if (free_space < data_size)
3733 goto out_unlock;
3734
3735 left_nritems = btrfs_header_nritems(left);
3736 if (left_nritems == 0)
3737 goto out_unlock;
3738
3739 if (check_sibling_keys(left, right)) {
3740 ret = -EUCLEAN;
3741 btrfs_tree_unlock(right);
3742 free_extent_buffer(right);
3743 return ret;
3744 }
3745 if (path->slots[0] == left_nritems && !empty) {
3746
3747
3748
3749
3750 btrfs_tree_unlock(left);
3751 free_extent_buffer(left);
3752 path->nodes[0] = right;
3753 path->slots[0] = 0;
3754 path->slots[1]++;
3755 return 0;
3756 }
3757
3758 return __push_leaf_right(path, min_data_size, empty,
3759 right, free_space, left_nritems, min_slot);
3760out_unlock:
3761 btrfs_tree_unlock(right);
3762 free_extent_buffer(right);
3763 return 1;
3764}
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
3775 int empty, struct extent_buffer *left,
3776 int free_space, u32 right_nritems,
3777 u32 max_slot)
3778{
3779 struct btrfs_fs_info *fs_info = left->fs_info;
3780 struct btrfs_disk_key disk_key;
3781 struct extent_buffer *right = path->nodes[0];
3782 int i;
3783 int push_space = 0;
3784 int push_items = 0;
3785 struct btrfs_item *item;
3786 u32 old_left_nritems;
3787 u32 nr;
3788 int ret = 0;
3789 u32 this_item_size;
3790 u32 old_left_item_size;
3791 struct btrfs_map_token token;
3792
3793 if (empty)
3794 nr = min(right_nritems, max_slot);
3795 else
3796 nr = min(right_nritems - 1, max_slot);
3797
3798 for (i = 0; i < nr; i++) {
3799 item = btrfs_item_nr(i);
3800
3801 if (!empty && push_items > 0) {
3802 if (path->slots[0] < i)
3803 break;
3804 if (path->slots[0] == i) {
3805 int space = btrfs_leaf_free_space(right);
3806
3807 if (space + push_space * 2 > free_space)
3808 break;
3809 }
3810 }
3811
3812 if (path->slots[0] == i)
3813 push_space += data_size;
3814
3815 this_item_size = btrfs_item_size(right, item);
3816 if (this_item_size + sizeof(*item) + push_space > free_space)
3817 break;
3818
3819 push_items++;
3820 push_space += this_item_size + sizeof(*item);
3821 }
3822
3823 if (push_items == 0) {
3824 ret = 1;
3825 goto out;
3826 }
3827 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
3828
3829
3830 copy_extent_buffer(left, right,
3831 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3832 btrfs_item_nr_offset(0),
3833 push_items * sizeof(struct btrfs_item));
3834
3835 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
3836 btrfs_item_offset_nr(right, push_items - 1);
3837
3838 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
3839 leaf_data_end(left) - push_space,
3840 BTRFS_LEAF_DATA_OFFSET +
3841 btrfs_item_offset_nr(right, push_items - 1),
3842 push_space);
3843 old_left_nritems = btrfs_header_nritems(left);
3844 BUG_ON(old_left_nritems <= 0);
3845
3846 btrfs_init_map_token(&token, left);
3847 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
3848 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
3849 u32 ioff;
3850
3851 item = btrfs_item_nr(i);
3852
3853 ioff = btrfs_token_item_offset(&token, item);
3854 btrfs_set_token_item_offset(&token, item,
3855 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
3856 }
3857 btrfs_set_header_nritems(left, old_left_nritems + push_items);
3858
3859
3860 if (push_items > right_nritems)
3861 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
3862 right_nritems);
3863
3864 if (push_items < right_nritems) {
3865 push_space = btrfs_item_offset_nr(right, push_items - 1) -
3866 leaf_data_end(right);
3867 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
3868 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
3869 BTRFS_LEAF_DATA_OFFSET +
3870 leaf_data_end(right), push_space);
3871
3872 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
3873 btrfs_item_nr_offset(push_items),
3874 (btrfs_header_nritems(right) - push_items) *
3875 sizeof(struct btrfs_item));
3876 }
3877
3878 btrfs_init_map_token(&token, right);
3879 right_nritems -= push_items;
3880 btrfs_set_header_nritems(right, right_nritems);
3881 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
3882 for (i = 0; i < right_nritems; i++) {
3883 item = btrfs_item_nr(i);
3884
3885 push_space = push_space - btrfs_token_item_size(&token, item);
3886 btrfs_set_token_item_offset(&token, item, push_space);
3887 }
3888
3889 btrfs_mark_buffer_dirty(left);
3890 if (right_nritems)
3891 btrfs_mark_buffer_dirty(right);
3892 else
3893 btrfs_clean_tree_block(right);
3894
3895 btrfs_item_key(right, &disk_key, 0);
3896 fixup_low_keys(path, &disk_key, 1);
3897
3898
3899 if (path->slots[0] < push_items) {
3900 path->slots[0] += old_left_nritems;
3901 btrfs_tree_unlock(path->nodes[0]);
3902 free_extent_buffer(path->nodes[0]);
3903 path->nodes[0] = left;
3904 path->slots[1] -= 1;
3905 } else {
3906 btrfs_tree_unlock(left);
3907 free_extent_buffer(left);
3908 path->slots[0] -= push_items;
3909 }
3910 BUG_ON(path->slots[0] < 0);
3911 return ret;
3912out:
3913 btrfs_tree_unlock(left);
3914 free_extent_buffer(left);
3915 return ret;
3916}
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3927 *root, struct btrfs_path *path, int min_data_size,
3928 int data_size, int empty, u32 max_slot)
3929{
3930 struct extent_buffer *right = path->nodes[0];
3931 struct extent_buffer *left;
3932 int slot;
3933 int free_space;
3934 u32 right_nritems;
3935 int ret = 0;
3936
3937 slot = path->slots[1];
3938 if (slot == 0)
3939 return 1;
3940 if (!path->nodes[1])
3941 return 1;
3942
3943 right_nritems = btrfs_header_nritems(right);
3944 if (right_nritems == 0)
3945 return 1;
3946
3947 btrfs_assert_tree_locked(path->nodes[1]);
3948
3949 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
3950
3951
3952
3953
3954 if (IS_ERR(left))
3955 return 1;
3956
3957 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
3958
3959 free_space = btrfs_leaf_free_space(left);
3960 if (free_space < data_size) {
3961 ret = 1;
3962 goto out;
3963 }
3964
3965
3966 ret = btrfs_cow_block(trans, root, left,
3967 path->nodes[1], slot - 1, &left,
3968 BTRFS_NESTING_LEFT_COW);
3969 if (ret) {
3970
3971 if (ret == -ENOSPC)
3972 ret = 1;
3973 goto out;
3974 }
3975
3976 free_space = btrfs_leaf_free_space(left);
3977 if (free_space < data_size) {
3978 ret = 1;
3979 goto out;
3980 }
3981
3982 if (check_sibling_keys(left, right)) {
3983 ret = -EUCLEAN;
3984 goto out;
3985 }
3986 return __push_leaf_left(path, min_data_size,
3987 empty, left, free_space, right_nritems,
3988 max_slot);
3989out:
3990 btrfs_tree_unlock(left);
3991 free_extent_buffer(left);
3992 return ret;
3993}
3994
3995
3996
3997
3998
3999static noinline void copy_for_split(struct btrfs_trans_handle *trans,
4000 struct btrfs_path *path,
4001 struct extent_buffer *l,
4002 struct extent_buffer *right,
4003 int slot, int mid, int nritems)
4004{
4005 struct btrfs_fs_info *fs_info = trans->fs_info;
4006 int data_copy_size;
4007 int rt_data_off;
4008 int i;
4009 struct btrfs_disk_key disk_key;
4010 struct btrfs_map_token token;
4011
4012 nritems = nritems - mid;
4013 btrfs_set_header_nritems(right, nritems);
4014 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
4015
4016 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
4017 btrfs_item_nr_offset(mid),
4018 nritems * sizeof(struct btrfs_item));
4019
4020 copy_extent_buffer(right, l,
4021 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
4022 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
4023 leaf_data_end(l), data_copy_size);
4024
4025 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
4026
4027 btrfs_init_map_token(&token, right);
4028 for (i = 0; i < nritems; i++) {
4029 struct btrfs_item *item = btrfs_item_nr(i);
4030 u32 ioff;
4031
4032 ioff = btrfs_token_item_offset(&token, item);
4033 btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
4034 }
4035
4036 btrfs_set_header_nritems(l, mid);
4037 btrfs_item_key(right, &disk_key, 0);
4038 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
4039
4040 btrfs_mark_buffer_dirty(right);
4041 btrfs_mark_buffer_dirty(l);
4042 BUG_ON(path->slots[0] != slot);
4043
4044 if (mid <= slot) {
4045 btrfs_tree_unlock(path->nodes[0]);
4046 free_extent_buffer(path->nodes[0]);
4047 path->nodes[0] = right;
4048 path->slots[0] -= mid;
4049 path->slots[1] += 1;
4050 } else {
4051 btrfs_tree_unlock(right);
4052 free_extent_buffer(right);
4053 }
4054
4055 BUG_ON(path->slots[0] < 0);
4056}
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4069 struct btrfs_root *root,
4070 struct btrfs_path *path,
4071 int data_size)
4072{
4073 int ret;
4074 int progress = 0;
4075 int slot;
4076 u32 nritems;
4077 int space_needed = data_size;
4078
4079 slot = path->slots[0];
4080 if (slot < btrfs_header_nritems(path->nodes[0]))
4081 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
4082
4083
4084
4085
4086
4087 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
4088 if (ret < 0)
4089 return ret;
4090
4091 if (ret == 0)
4092 progress++;
4093
4094 nritems = btrfs_header_nritems(path->nodes[0]);
4095
4096
4097
4098
4099 if (path->slots[0] == 0 || path->slots[0] == nritems)
4100 return 0;
4101
4102 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
4103 return 0;
4104
4105
4106 slot = path->slots[0];
4107 space_needed = data_size;
4108 if (slot > 0)
4109 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
4110 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
4111 if (ret < 0)
4112 return ret;
4113
4114 if (ret == 0)
4115 progress++;
4116
4117 if (progress)
4118 return 0;
4119 return 1;
4120}
4121
4122
4123
4124
4125
4126
4127
4128static noinline int split_leaf(struct btrfs_trans_handle *trans,
4129 struct btrfs_root *root,
4130 const struct btrfs_key *ins_key,
4131 struct btrfs_path *path, int data_size,
4132 int extend)
4133{
4134 struct btrfs_disk_key disk_key;
4135 struct extent_buffer *l;
4136 u32 nritems;
4137 int mid;
4138 int slot;
4139 struct extent_buffer *right;
4140 struct btrfs_fs_info *fs_info = root->fs_info;
4141 int ret = 0;
4142 int wret;
4143 int split;
4144 int num_doubles = 0;
4145 int tried_avoid_double = 0;
4146
4147 l = path->nodes[0];
4148 slot = path->slots[0];
4149 if (extend && data_size + btrfs_item_size_nr(l, slot) +
4150 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
4151 return -EOVERFLOW;
4152
4153
4154 if (data_size && path->nodes[1]) {
4155 int space_needed = data_size;
4156
4157 if (slot < btrfs_header_nritems(l))
4158 space_needed -= btrfs_leaf_free_space(l);
4159
4160 wret = push_leaf_right(trans, root, path, space_needed,
4161 space_needed, 0, 0);
4162 if (wret < 0)
4163 return wret;
4164 if (wret) {
4165 space_needed = data_size;
4166 if (slot > 0)
4167 space_needed -= btrfs_leaf_free_space(l);
4168 wret = push_leaf_left(trans, root, path, space_needed,
4169 space_needed, 0, (u32)-1);
4170 if (wret < 0)
4171 return wret;
4172 }
4173 l = path->nodes[0];
4174
4175
4176 if (btrfs_leaf_free_space(l) >= data_size)
4177 return 0;
4178 }
4179
4180 if (!path->nodes[1]) {
4181 ret = insert_new_root(trans, root, path, 1);
4182 if (ret)
4183 return ret;
4184 }
4185again:
4186 split = 1;
4187 l = path->nodes[0];
4188 slot = path->slots[0];
4189 nritems = btrfs_header_nritems(l);
4190 mid = (nritems + 1) / 2;
4191
4192 if (mid <= slot) {
4193 if (nritems == 1 ||
4194 leaf_space_used(l, mid, nritems - mid) + data_size >
4195 BTRFS_LEAF_DATA_SIZE(fs_info)) {
4196 if (slot >= nritems) {
4197 split = 0;
4198 } else {
4199 mid = slot;
4200 if (mid != nritems &&
4201 leaf_space_used(l, mid, nritems - mid) +
4202 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
4203 if (data_size && !tried_avoid_double)
4204 goto push_for_double;
4205 split = 2;
4206 }
4207 }
4208 }
4209 } else {
4210 if (leaf_space_used(l, 0, mid) + data_size >
4211 BTRFS_LEAF_DATA_SIZE(fs_info)) {
4212 if (!extend && data_size && slot == 0) {
4213 split = 0;
4214 } else if ((extend || !data_size) && slot == 0) {
4215 mid = 1;
4216 } else {
4217 mid = slot;
4218 if (mid != nritems &&
4219 leaf_space_used(l, mid, nritems - mid) +
4220 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
4221 if (data_size && !tried_avoid_double)
4222 goto push_for_double;
4223 split = 2;
4224 }
4225 }
4226 }
4227 }
4228
4229 if (split == 0)
4230 btrfs_cpu_key_to_disk(&disk_key, ins_key);
4231 else
4232 btrfs_item_key(l, &disk_key, mid);
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242 right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
4243 l->start, 0, num_doubles ?
4244 BTRFS_NESTING_NEW_ROOT :
4245 BTRFS_NESTING_SPLIT);
4246 if (IS_ERR(right))
4247 return PTR_ERR(right);
4248
4249 root_add_used(root, fs_info->nodesize);
4250
4251 if (split == 0) {
4252 if (mid <= slot) {
4253 btrfs_set_header_nritems(right, 0);
4254 insert_ptr(trans, path, &disk_key,
4255 right->start, path->slots[1] + 1, 1);
4256 btrfs_tree_unlock(path->nodes[0]);
4257 free_extent_buffer(path->nodes[0]);
4258 path->nodes[0] = right;
4259 path->slots[0] = 0;
4260 path->slots[1] += 1;
4261 } else {
4262 btrfs_set_header_nritems(right, 0);
4263 insert_ptr(trans, path, &disk_key,
4264 right->start, path->slots[1], 1);
4265 btrfs_tree_unlock(path->nodes[0]);
4266 free_extent_buffer(path->nodes[0]);
4267 path->nodes[0] = right;
4268 path->slots[0] = 0;
4269 if (path->slots[1] == 0)
4270 fixup_low_keys(path, &disk_key, 1);
4271 }
4272
4273
4274
4275
4276
4277 return ret;
4278 }
4279
4280 copy_for_split(trans, path, l, right, slot, mid, nritems);
4281
4282 if (split == 2) {
4283 BUG_ON(num_doubles != 0);
4284 num_doubles++;
4285 goto again;
4286 }
4287
4288 return 0;
4289
4290push_for_double:
4291 push_for_double_split(trans, root, path, data_size);
4292 tried_avoid_double = 1;
4293 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
4294 return 0;
4295 goto again;
4296}
4297
4298static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4299 struct btrfs_root *root,
4300 struct btrfs_path *path, int ins_len)
4301{
4302 struct btrfs_key key;
4303 struct extent_buffer *leaf;
4304 struct btrfs_file_extent_item *fi;
4305 u64 extent_len = 0;
4306 u32 item_size;
4307 int ret;
4308
4309 leaf = path->nodes[0];
4310 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4311
4312 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
4313 key.type != BTRFS_EXTENT_CSUM_KEY);
4314
4315 if (btrfs_leaf_free_space(leaf) >= ins_len)
4316 return 0;
4317
4318 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4319 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4320 fi = btrfs_item_ptr(leaf, path->slots[0],
4321 struct btrfs_file_extent_item);
4322 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
4323 }
4324 btrfs_release_path(path);
4325
4326 path->keep_locks = 1;
4327 path->search_for_split = 1;
4328 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4329 path->search_for_split = 0;
4330 if (ret > 0)
4331 ret = -EAGAIN;
4332 if (ret < 0)
4333 goto err;
4334
4335 ret = -EAGAIN;
4336 leaf = path->nodes[0];
4337
4338 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
4339 goto err;
4340
4341
4342 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
4343 goto err;
4344
4345 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4346 fi = btrfs_item_ptr(leaf, path->slots[0],
4347 struct btrfs_file_extent_item);
4348 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
4349 goto err;
4350 }
4351
4352 ret = split_leaf(trans, root, &key, path, ins_len, 1);
4353 if (ret)
4354 goto err;
4355
4356 path->keep_locks = 0;
4357 btrfs_unlock_up_safe(path, 1);
4358 return 0;
4359err:
4360 path->keep_locks = 0;
4361 return ret;
4362}
4363
4364static noinline int split_item(struct btrfs_path *path,
4365 const struct btrfs_key *new_key,
4366 unsigned long split_offset)
4367{
4368 struct extent_buffer *leaf;
4369 struct btrfs_item *item;
4370 struct btrfs_item *new_item;
4371 int slot;
4372 char *buf;
4373 u32 nritems;
4374 u32 item_size;
4375 u32 orig_offset;
4376 struct btrfs_disk_key disk_key;
4377
4378 leaf = path->nodes[0];
4379 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
4380
4381 item = btrfs_item_nr(path->slots[0]);
4382 orig_offset = btrfs_item_offset(leaf, item);
4383 item_size = btrfs_item_size(leaf, item);
4384
4385 buf = kmalloc(item_size, GFP_NOFS);
4386 if (!buf)
4387 return -ENOMEM;
4388
4389 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
4390 path->slots[0]), item_size);
4391
4392 slot = path->slots[0] + 1;
4393 nritems = btrfs_header_nritems(leaf);
4394 if (slot != nritems) {
4395
4396 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
4397 btrfs_item_nr_offset(slot),
4398 (nritems - slot) * sizeof(struct btrfs_item));
4399 }
4400
4401 btrfs_cpu_key_to_disk(&disk_key, new_key);
4402 btrfs_set_item_key(leaf, &disk_key, slot);
4403
4404 new_item = btrfs_item_nr(slot);
4405
4406 btrfs_set_item_offset(leaf, new_item, orig_offset);
4407 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
4408
4409 btrfs_set_item_offset(leaf, item,
4410 orig_offset + item_size - split_offset);
4411 btrfs_set_item_size(leaf, item, split_offset);
4412
4413 btrfs_set_header_nritems(leaf, nritems + 1);
4414
4415
4416 write_extent_buffer(leaf, buf,
4417 btrfs_item_ptr_offset(leaf, path->slots[0]),
4418 split_offset);
4419
4420
4421 write_extent_buffer(leaf, buf + split_offset,
4422 btrfs_item_ptr_offset(leaf, slot),
4423 item_size - split_offset);
4424 btrfs_mark_buffer_dirty(leaf);
4425
4426 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
4427 kfree(buf);
4428 return 0;
4429}
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446int btrfs_split_item(struct btrfs_trans_handle *trans,
4447 struct btrfs_root *root,
4448 struct btrfs_path *path,
4449 const struct btrfs_key *new_key,
4450 unsigned long split_offset)
4451{
4452 int ret;
4453 ret = setup_leaf_for_split(trans, root, path,
4454 sizeof(struct btrfs_item));
4455 if (ret)
4456 return ret;
4457
4458 ret = split_item(path, new_key, split_offset);
4459 return ret;
4460}
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4471 struct btrfs_root *root,
4472 struct btrfs_path *path,
4473 const struct btrfs_key *new_key)
4474{
4475 struct extent_buffer *leaf;
4476 int ret;
4477 u32 item_size;
4478
4479 leaf = path->nodes[0];
4480 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4481 ret = setup_leaf_for_split(trans, root, path,
4482 item_size + sizeof(struct btrfs_item));
4483 if (ret)
4484 return ret;
4485
4486 path->slots[0]++;
4487 setup_items_for_insert(root, path, new_key, &item_size, 1);
4488 leaf = path->nodes[0];
4489 memcpy_extent_buffer(leaf,
4490 btrfs_item_ptr_offset(leaf, path->slots[0]),
4491 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4492 item_size);
4493 return 0;
4494}
4495
4496
4497
4498
4499
4500
4501
4502void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
4503{
4504 int slot;
4505 struct extent_buffer *leaf;
4506 struct btrfs_item *item;
4507 u32 nritems;
4508 unsigned int data_end;
4509 unsigned int old_data_start;
4510 unsigned int old_size;
4511 unsigned int size_diff;
4512 int i;
4513 struct btrfs_map_token token;
4514
4515 leaf = path->nodes[0];
4516 slot = path->slots[0];
4517
4518 old_size = btrfs_item_size_nr(leaf, slot);
4519 if (old_size == new_size)
4520 return;
4521
4522 nritems = btrfs_header_nritems(leaf);
4523 data_end = leaf_data_end(leaf);
4524
4525 old_data_start = btrfs_item_offset_nr(leaf, slot);
4526
4527 size_diff = old_size - new_size;
4528
4529 BUG_ON(slot < 0);
4530 BUG_ON(slot >= nritems);
4531
4532
4533
4534
4535
4536 btrfs_init_map_token(&token, leaf);
4537 for (i = slot; i < nritems; i++) {
4538 u32 ioff;
4539 item = btrfs_item_nr(i);
4540
4541 ioff = btrfs_token_item_offset(&token, item);
4542 btrfs_set_token_item_offset(&token, item, ioff + size_diff);
4543 }
4544
4545
4546 if (from_end) {
4547 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4548 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
4549 data_end, old_data_start + new_size - data_end);
4550 } else {
4551 struct btrfs_disk_key disk_key;
4552 u64 offset;
4553
4554 btrfs_item_key(leaf, &disk_key, slot);
4555
4556 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
4557 unsigned long ptr;
4558 struct btrfs_file_extent_item *fi;
4559
4560 fi = btrfs_item_ptr(leaf, slot,
4561 struct btrfs_file_extent_item);
4562 fi = (struct btrfs_file_extent_item *)(
4563 (unsigned long)fi - size_diff);
4564
4565 if (btrfs_file_extent_type(leaf, fi) ==
4566 BTRFS_FILE_EXTENT_INLINE) {
4567 ptr = btrfs_item_ptr_offset(leaf, slot);
4568 memmove_extent_buffer(leaf, ptr,
4569 (unsigned long)fi,
4570 BTRFS_FILE_EXTENT_INLINE_DATA_START);
4571 }
4572 }
4573
4574 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4575 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
4576 data_end, old_data_start - data_end);
4577
4578 offset = btrfs_disk_key_offset(&disk_key);
4579 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4580 btrfs_set_item_key(leaf, &disk_key, slot);
4581 if (slot == 0)
4582 fixup_low_keys(path, &disk_key, 1);
4583 }
4584
4585 item = btrfs_item_nr(slot);
4586 btrfs_set_item_size(leaf, item, new_size);
4587 btrfs_mark_buffer_dirty(leaf);
4588
4589 if (btrfs_leaf_free_space(leaf) < 0) {
4590 btrfs_print_leaf(leaf);
4591 BUG();
4592 }
4593}
4594
4595
4596
4597
4598void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
4599{
4600 int slot;
4601 struct extent_buffer *leaf;
4602 struct btrfs_item *item;
4603 u32 nritems;
4604 unsigned int data_end;
4605 unsigned int old_data;
4606 unsigned int old_size;
4607 int i;
4608 struct btrfs_map_token token;
4609
4610 leaf = path->nodes[0];
4611
4612 nritems = btrfs_header_nritems(leaf);
4613 data_end = leaf_data_end(leaf);
4614
4615 if (btrfs_leaf_free_space(leaf) < data_size) {
4616 btrfs_print_leaf(leaf);
4617 BUG();
4618 }
4619 slot = path->slots[0];
4620 old_data = btrfs_item_end_nr(leaf, slot);
4621
4622 BUG_ON(slot < 0);
4623 if (slot >= nritems) {
4624 btrfs_print_leaf(leaf);
4625 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
4626 slot, nritems);
4627 BUG();
4628 }
4629
4630
4631
4632
4633
4634 btrfs_init_map_token(&token, leaf);
4635 for (i = slot; i < nritems; i++) {
4636 u32 ioff;
4637 item = btrfs_item_nr(i);
4638
4639 ioff = btrfs_token_item_offset(&token, item);
4640 btrfs_set_token_item_offset(&token, item, ioff - data_size);
4641 }
4642
4643
4644 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4645 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
4646 data_end, old_data - data_end);
4647
4648 data_end = old_data;
4649 old_size = btrfs_item_size_nr(leaf, slot);
4650 item = btrfs_item_nr(slot);
4651 btrfs_set_item_size(leaf, item, old_size + data_size);
4652 btrfs_mark_buffer_dirty(leaf);
4653
4654 if (btrfs_leaf_free_space(leaf) < 0) {
4655 btrfs_print_leaf(leaf);
4656 BUG();
4657 }
4658}
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4672 const struct btrfs_key *cpu_key, u32 *data_size,
4673 int nr)
4674{
4675 struct btrfs_fs_info *fs_info = root->fs_info;
4676 struct btrfs_item *item;
4677 int i;
4678 u32 nritems;
4679 unsigned int data_end;
4680 struct btrfs_disk_key disk_key;
4681 struct extent_buffer *leaf;
4682 int slot;
4683 struct btrfs_map_token token;
4684 u32 total_size;
4685 u32 total_data = 0;
4686
4687 for (i = 0; i < nr; i++)
4688 total_data += data_size[i];
4689 total_size = total_data + (nr * sizeof(struct btrfs_item));
4690
4691 if (path->slots[0] == 0) {
4692 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
4693 fixup_low_keys(path, &disk_key, 1);
4694 }
4695 btrfs_unlock_up_safe(path, 1);
4696
4697 leaf = path->nodes[0];
4698 slot = path->slots[0];
4699
4700 nritems = btrfs_header_nritems(leaf);
4701 data_end = leaf_data_end(leaf);
4702
4703 if (btrfs_leaf_free_space(leaf) < total_size) {
4704 btrfs_print_leaf(leaf);
4705 btrfs_crit(fs_info, "not enough freespace need %u have %d",
4706 total_size, btrfs_leaf_free_space(leaf));
4707 BUG();
4708 }
4709
4710 btrfs_init_map_token(&token, leaf);
4711 if (slot != nritems) {
4712 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
4713
4714 if (old_data < data_end) {
4715 btrfs_print_leaf(leaf);
4716 btrfs_crit(fs_info,
4717 "item at slot %d with data offset %u beyond data end of leaf %u",
4718 slot, old_data, data_end);
4719 BUG();
4720 }
4721
4722
4723
4724
4725 for (i = slot; i < nritems; i++) {
4726 u32 ioff;
4727
4728 item = btrfs_item_nr(i);
4729 ioff = btrfs_token_item_offset(&token, item);
4730 btrfs_set_token_item_offset(&token, item,
4731 ioff - total_data);
4732 }
4733
4734 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
4735 btrfs_item_nr_offset(slot),
4736 (nritems - slot) * sizeof(struct btrfs_item));
4737
4738
4739 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4740 data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
4741 data_end, old_data - data_end);
4742 data_end = old_data;
4743 }
4744
4745
4746 for (i = 0; i < nr; i++) {
4747 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
4748 btrfs_set_item_key(leaf, &disk_key, slot + i);
4749 item = btrfs_item_nr(slot + i);
4750 data_end -= data_size[i];
4751 btrfs_set_token_item_offset(&token, item, data_end);
4752 btrfs_set_token_item_size(&token, item, data_size[i]);
4753 }
4754
4755 btrfs_set_header_nritems(leaf, nritems + nr);
4756 btrfs_mark_buffer_dirty(leaf);
4757
4758 if (btrfs_leaf_free_space(leaf) < 0) {
4759 btrfs_print_leaf(leaf);
4760 BUG();
4761 }
4762}
4763
4764
4765
4766
4767
4768int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4769 struct btrfs_root *root,
4770 struct btrfs_path *path,
4771 const struct btrfs_key *cpu_key, u32 *data_size,
4772 int nr)
4773{
4774 int ret = 0;
4775 int slot;
4776 int i;
4777 u32 total_size = 0;
4778 u32 total_data = 0;
4779
4780 for (i = 0; i < nr; i++)
4781 total_data += data_size[i];
4782
4783 total_size = total_data + (nr * sizeof(struct btrfs_item));
4784 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
4785 if (ret == 0)
4786 return -EEXIST;
4787 if (ret < 0)
4788 return ret;
4789
4790 slot = path->slots[0];
4791 BUG_ON(slot < 0);
4792
4793 setup_items_for_insert(root, path, cpu_key, data_size, nr);
4794 return 0;
4795}
4796
4797
4798
4799
4800
4801int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4802 const struct btrfs_key *cpu_key, void *data,
4803 u32 data_size)
4804{
4805 int ret = 0;
4806 struct btrfs_path *path;
4807 struct extent_buffer *leaf;
4808 unsigned long ptr;
4809
4810 path = btrfs_alloc_path();
4811 if (!path)
4812 return -ENOMEM;
4813 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
4814 if (!ret) {
4815 leaf = path->nodes[0];
4816 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4817 write_extent_buffer(leaf, data, ptr, data_size);
4818 btrfs_mark_buffer_dirty(leaf);
4819 }
4820 btrfs_free_path(path);
4821 return ret;
4822}
4823
4824
4825
4826
4827
4828
4829
4830static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4831 int level, int slot)
4832{
4833 struct extent_buffer *parent = path->nodes[level];
4834 u32 nritems;
4835 int ret;
4836
4837 nritems = btrfs_header_nritems(parent);
4838 if (slot != nritems - 1) {
4839 if (level) {
4840 ret = tree_mod_log_insert_move(parent, slot, slot + 1,
4841 nritems - slot - 1);
4842 BUG_ON(ret < 0);
4843 }
4844 memmove_extent_buffer(parent,
4845 btrfs_node_key_ptr_offset(slot),
4846 btrfs_node_key_ptr_offset(slot + 1),
4847 sizeof(struct btrfs_key_ptr) *
4848 (nritems - slot - 1));
4849 } else if (level) {
4850 ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
4851 GFP_NOFS);
4852 BUG_ON(ret < 0);
4853 }
4854
4855 nritems--;
4856 btrfs_set_header_nritems(parent, nritems);
4857 if (nritems == 0 && parent == root->node) {
4858 BUG_ON(btrfs_header_level(root->node) != 1);
4859
4860 btrfs_set_header_level(root->node, 0);
4861 } else if (slot == 0) {
4862 struct btrfs_disk_key disk_key;
4863
4864 btrfs_node_key(parent, &disk_key, 0);
4865 fixup_low_keys(path, &disk_key, level + 1);
4866 }
4867 btrfs_mark_buffer_dirty(parent);
4868}
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4881 struct btrfs_root *root,
4882 struct btrfs_path *path,
4883 struct extent_buffer *leaf)
4884{
4885 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4886 del_ptr(root, path, 1, path->slots[1]);
4887
4888
4889
4890
4891
4892 btrfs_unlock_up_safe(path, 0);
4893
4894 root_sub_used(root, leaf->len);
4895
4896 atomic_inc(&leaf->refs);
4897 btrfs_free_tree_block(trans, root, leaf, 0, 1);
4898 free_extent_buffer_stale(leaf);
4899}
4900
4901
4902
4903
4904int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4905 struct btrfs_path *path, int slot, int nr)
4906{
4907 struct btrfs_fs_info *fs_info = root->fs_info;
4908 struct extent_buffer *leaf;
4909 struct btrfs_item *item;
4910 u32 last_off;
4911 u32 dsize = 0;
4912 int ret = 0;
4913 int wret;
4914 int i;
4915 u32 nritems;
4916
4917 leaf = path->nodes[0];
4918 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
4919
4920 for (i = 0; i < nr; i++)
4921 dsize += btrfs_item_size_nr(leaf, slot + i);
4922
4923 nritems = btrfs_header_nritems(leaf);
4924
4925 if (slot + nr != nritems) {
4926 int data_end = leaf_data_end(leaf);
4927 struct btrfs_map_token token;
4928
4929 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4930 data_end + dsize,
4931 BTRFS_LEAF_DATA_OFFSET + data_end,
4932 last_off - data_end);
4933
4934 btrfs_init_map_token(&token, leaf);
4935 for (i = slot + nr; i < nritems; i++) {
4936 u32 ioff;
4937
4938 item = btrfs_item_nr(i);
4939 ioff = btrfs_token_item_offset(&token, item);
4940 btrfs_set_token_item_offset(&token, item, ioff + dsize);
4941 }
4942
4943 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
4944 btrfs_item_nr_offset(slot + nr),
4945 sizeof(struct btrfs_item) *
4946 (nritems - slot - nr));
4947 }
4948 btrfs_set_header_nritems(leaf, nritems - nr);
4949 nritems -= nr;
4950
4951
4952 if (nritems == 0) {
4953 if (leaf == root->node) {
4954 btrfs_set_header_level(leaf, 0);
4955 } else {
4956 btrfs_clean_tree_block(leaf);
4957 btrfs_del_leaf(trans, root, path, leaf);
4958 }
4959 } else {
4960 int used = leaf_space_used(leaf, 0, nritems);
4961 if (slot == 0) {
4962 struct btrfs_disk_key disk_key;
4963
4964 btrfs_item_key(leaf, &disk_key, 0);
4965 fixup_low_keys(path, &disk_key, 1);
4966 }
4967
4968
4969 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
4970
4971
4972
4973
4974 slot = path->slots[1];
4975 atomic_inc(&leaf->refs);
4976
4977 wret = push_leaf_left(trans, root, path, 1, 1,
4978 1, (u32)-1);
4979 if (wret < 0 && wret != -ENOSPC)
4980 ret = wret;
4981
4982 if (path->nodes[0] == leaf &&
4983 btrfs_header_nritems(leaf)) {
4984 wret = push_leaf_right(trans, root, path, 1,
4985 1, 1, 0);
4986 if (wret < 0 && wret != -ENOSPC)
4987 ret = wret;
4988 }
4989
4990 if (btrfs_header_nritems(leaf) == 0) {
4991 path->slots[1] = slot;
4992 btrfs_del_leaf(trans, root, path, leaf);
4993 free_extent_buffer(leaf);
4994 ret = 0;
4995 } else {
4996
4997
4998
4999
5000
5001 if (path->nodes[0] == leaf)
5002 btrfs_mark_buffer_dirty(leaf);
5003 free_extent_buffer(leaf);
5004 }
5005 } else {
5006 btrfs_mark_buffer_dirty(leaf);
5007 }
5008 }
5009 return ret;
5010}
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
5021{
5022 struct btrfs_key key;
5023 struct btrfs_disk_key found_key;
5024 int ret;
5025
5026 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
5027
5028 if (key.offset > 0) {
5029 key.offset--;
5030 } else if (key.type > 0) {
5031 key.type--;
5032 key.offset = (u64)-1;
5033 } else if (key.objectid > 0) {
5034 key.objectid--;
5035 key.type = (u8)-1;
5036 key.offset = (u64)-1;
5037 } else {
5038 return 1;
5039 }
5040
5041 btrfs_release_path(path);
5042 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5043 if (ret < 0)
5044 return ret;
5045 btrfs_item_key(path->nodes[0], &found_key, 0);
5046 ret = comp_keys(&found_key, &key);
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057 if (ret <= 0)
5058 return 0;
5059 return 1;
5060}
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
5082 struct btrfs_path *path,
5083 u64 min_trans)
5084{
5085 struct extent_buffer *cur;
5086 struct btrfs_key found_key;
5087 int slot;
5088 int sret;
5089 u32 nritems;
5090 int level;
5091 int ret = 1;
5092 int keep_locks = path->keep_locks;
5093
5094 path->keep_locks = 1;
5095again:
5096 cur = btrfs_read_lock_root_node(root);
5097 level = btrfs_header_level(cur);
5098 WARN_ON(path->nodes[level]);
5099 path->nodes[level] = cur;
5100 path->locks[level] = BTRFS_READ_LOCK;
5101
5102 if (btrfs_header_generation(cur) < min_trans) {
5103 ret = 1;
5104 goto out;
5105 }
5106 while (1) {
5107 nritems = btrfs_header_nritems(cur);
5108 level = btrfs_header_level(cur);
5109 sret = btrfs_bin_search(cur, min_key, &slot);
5110 if (sret < 0) {
5111 ret = sret;
5112 goto out;
5113 }
5114
5115
5116 if (level == path->lowest_level) {
5117 if (slot >= nritems)
5118 goto find_next_key;
5119 ret = 0;
5120 path->slots[level] = slot;
5121 btrfs_item_key_to_cpu(cur, &found_key, slot);
5122 goto out;
5123 }
5124 if (sret && slot > 0)
5125 slot--;
5126
5127
5128
5129
5130 while (slot < nritems) {
5131 u64 gen;
5132
5133 gen = btrfs_node_ptr_generation(cur, slot);
5134 if (gen < min_trans) {
5135 slot++;
5136 continue;
5137 }
5138 break;
5139 }
5140find_next_key:
5141
5142
5143
5144
5145 if (slot >= nritems) {
5146 path->slots[level] = slot;
5147 sret = btrfs_find_next_key(root, path, min_key, level,
5148 min_trans);
5149 if (sret == 0) {
5150 btrfs_release_path(path);
5151 goto again;
5152 } else {
5153 goto out;
5154 }
5155 }
5156
5157 btrfs_node_key_to_cpu(cur, &found_key, slot);
5158 path->slots[level] = slot;
5159 if (level == path->lowest_level) {
5160 ret = 0;
5161 goto out;
5162 }
5163 cur = btrfs_read_node_slot(cur, slot);
5164 if (IS_ERR(cur)) {
5165 ret = PTR_ERR(cur);
5166 goto out;
5167 }
5168
5169 btrfs_tree_read_lock(cur);
5170
5171 path->locks[level - 1] = BTRFS_READ_LOCK;
5172 path->nodes[level - 1] = cur;
5173 unlock_up(path, level, 1, 0, NULL);
5174 }
5175out:
5176 path->keep_locks = keep_locks;
5177 if (ret == 0) {
5178 btrfs_unlock_up_safe(path, path->lowest_level + 1);
5179 memcpy(min_key, &found_key, sizeof(found_key));
5180 }
5181 return ret;
5182}
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
5196 struct btrfs_key *key, int level, u64 min_trans)
5197{
5198 int slot;
5199 struct extent_buffer *c;
5200
5201 WARN_ON(!path->keep_locks && !path->skip_locking);
5202 while (level < BTRFS_MAX_LEVEL) {
5203 if (!path->nodes[level])
5204 return 1;
5205
5206 slot = path->slots[level] + 1;
5207 c = path->nodes[level];
5208next:
5209 if (slot >= btrfs_header_nritems(c)) {
5210 int ret;
5211 int orig_lowest;
5212 struct btrfs_key cur_key;
5213 if (level + 1 >= BTRFS_MAX_LEVEL ||
5214 !path->nodes[level + 1])
5215 return 1;
5216
5217 if (path->locks[level + 1] || path->skip_locking) {
5218 level++;
5219 continue;
5220 }
5221
5222 slot = btrfs_header_nritems(c) - 1;
5223 if (level == 0)
5224 btrfs_item_key_to_cpu(c, &cur_key, slot);
5225 else
5226 btrfs_node_key_to_cpu(c, &cur_key, slot);
5227
5228 orig_lowest = path->lowest_level;
5229 btrfs_release_path(path);
5230 path->lowest_level = level;
5231 ret = btrfs_search_slot(NULL, root, &cur_key, path,
5232 0, 0);
5233 path->lowest_level = orig_lowest;
5234 if (ret < 0)
5235 return ret;
5236
5237 c = path->nodes[level];
5238 slot = path->slots[level];
5239 if (ret == 0)
5240 slot++;
5241 goto next;
5242 }
5243
5244 if (level == 0)
5245 btrfs_item_key_to_cpu(c, key, slot);
5246 else {
5247 u64 gen = btrfs_node_ptr_generation(c, slot);
5248
5249 if (gen < min_trans) {
5250 slot++;
5251 goto next;
5252 }
5253 btrfs_node_key_to_cpu(c, key, slot);
5254 }
5255 return 0;
5256 }
5257 return 1;
5258}
5259
5260
5261
5262
5263
5264
5265int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5266{
5267 return btrfs_next_old_leaf(root, path, 0);
5268}
5269
5270int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5271 u64 time_seq)
5272{
5273 int slot;
5274 int level;
5275 struct extent_buffer *c;
5276 struct extent_buffer *next;
5277 struct btrfs_key key;
5278 u32 nritems;
5279 int ret;
5280 int i;
5281
5282 nritems = btrfs_header_nritems(path->nodes[0]);
5283 if (nritems == 0)
5284 return 1;
5285
5286 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
5287again:
5288 level = 1;
5289 next = NULL;
5290 btrfs_release_path(path);
5291
5292 path->keep_locks = 1;
5293
5294 if (time_seq)
5295 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5296 else
5297 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5298 path->keep_locks = 0;
5299
5300 if (ret < 0)
5301 return ret;
5302
5303 nritems = btrfs_header_nritems(path->nodes[0]);
5304
5305
5306
5307
5308
5309
5310 if (nritems > 0 && path->slots[0] < nritems - 1) {
5311 if (ret == 0)
5312 path->slots[0]++;
5313 ret = 0;
5314 goto done;
5315 }
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
5331 ret = 0;
5332 goto done;
5333 }
5334
5335 while (level < BTRFS_MAX_LEVEL) {
5336 if (!path->nodes[level]) {
5337 ret = 1;
5338 goto done;
5339 }
5340
5341 slot = path->slots[level] + 1;
5342 c = path->nodes[level];
5343 if (slot >= btrfs_header_nritems(c)) {
5344 level++;
5345 if (level == BTRFS_MAX_LEVEL) {
5346 ret = 1;
5347 goto done;
5348 }
5349 continue;
5350 }
5351
5352
5353
5354
5355
5356
5357
5358 for (i = 0; i < level; i++) {
5359 if (path->locks[level]) {
5360 btrfs_tree_read_unlock(path->nodes[i]);
5361 path->locks[i] = 0;
5362 }
5363 free_extent_buffer(path->nodes[i]);
5364 path->nodes[i] = NULL;
5365 }
5366
5367 next = c;
5368 ret = read_block_for_search(root, path, &next, level,
5369 slot, &key);
5370 if (ret == -EAGAIN)
5371 goto again;
5372
5373 if (ret < 0) {
5374 btrfs_release_path(path);
5375 goto done;
5376 }
5377
5378 if (!path->skip_locking) {
5379 ret = btrfs_try_tree_read_lock(next);
5380 if (!ret && time_seq) {
5381
5382
5383
5384
5385
5386
5387
5388 free_extent_buffer(next);
5389 btrfs_release_path(path);
5390 cond_resched();
5391 goto again;
5392 }
5393 if (!ret)
5394 btrfs_tree_read_lock(next);
5395 }
5396 break;
5397 }
5398 path->slots[level] = slot;
5399 while (1) {
5400 level--;
5401 path->nodes[level] = next;
5402 path->slots[level] = 0;
5403 if (!path->skip_locking)
5404 path->locks[level] = BTRFS_READ_LOCK;
5405 if (!level)
5406 break;
5407
5408 ret = read_block_for_search(root, path, &next, level,
5409 0, &key);
5410 if (ret == -EAGAIN)
5411 goto again;
5412
5413 if (ret < 0) {
5414 btrfs_release_path(path);
5415 goto done;
5416 }
5417
5418 if (!path->skip_locking)
5419 btrfs_tree_read_lock(next);
5420 }
5421 ret = 0;
5422done:
5423 unlock_up(path, 0, 1, 0, NULL);
5424
5425 return ret;
5426}
5427
5428
5429
5430
5431
5432
5433
5434int btrfs_previous_item(struct btrfs_root *root,
5435 struct btrfs_path *path, u64 min_objectid,
5436 int type)
5437{
5438 struct btrfs_key found_key;
5439 struct extent_buffer *leaf;
5440 u32 nritems;
5441 int ret;
5442
5443 while (1) {
5444 if (path->slots[0] == 0) {
5445 ret = btrfs_prev_leaf(root, path);
5446 if (ret != 0)
5447 return ret;
5448 } else {
5449 path->slots[0]--;
5450 }
5451 leaf = path->nodes[0];
5452 nritems = btrfs_header_nritems(leaf);
5453 if (nritems == 0)
5454 return 1;
5455 if (path->slots[0] == nritems)
5456 path->slots[0]--;
5457
5458 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5459 if (found_key.objectid < min_objectid)
5460 break;
5461 if (found_key.type == type)
5462 return 0;
5463 if (found_key.objectid == min_objectid &&
5464 found_key.type < type)
5465 break;
5466 }
5467 return 1;
5468}
5469
5470
5471
5472
5473
5474
5475
5476int btrfs_previous_extent_item(struct btrfs_root *root,
5477 struct btrfs_path *path, u64 min_objectid)
5478{
5479 struct btrfs_key found_key;
5480 struct extent_buffer *leaf;
5481 u32 nritems;
5482 int ret;
5483
5484 while (1) {
5485 if (path->slots[0] == 0) {
5486 ret = btrfs_prev_leaf(root, path);
5487 if (ret != 0)
5488 return ret;
5489 } else {
5490 path->slots[0]--;
5491 }
5492 leaf = path->nodes[0];
5493 nritems = btrfs_header_nritems(leaf);
5494 if (nritems == 0)
5495 return 1;
5496 if (path->slots[0] == nritems)
5497 path->slots[0]--;
5498
5499 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5500 if (found_key.objectid < min_objectid)
5501 break;
5502 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5503 found_key.type == BTRFS_METADATA_ITEM_KEY)
5504 return 0;
5505 if (found_key.objectid == min_objectid &&
5506 found_key.type < BTRFS_EXTENT_ITEM_KEY)
5507 break;
5508 }
5509 return 1;
5510}
5511