1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/slab.h>
8#include <linux/rbtree.h>
9#include <linux/mm.h>
10#include "ctree.h"
11#include "disk-io.h"
12#include "transaction.h"
13#include "print-tree.h"
14#include "locking.h"
15#include "volumes.h"
16#include "qgroup.h"
17#include "tree-mod-log.h"
18
19static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
20 *root, struct btrfs_path *path, int level);
21static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
22 const struct btrfs_key *ins_key, struct btrfs_path *path,
23 int data_size, int extend);
24static int push_node_left(struct btrfs_trans_handle *trans,
25 struct extent_buffer *dst,
26 struct extent_buffer *src, int empty);
27static int balance_node_right(struct btrfs_trans_handle *trans,
28 struct extent_buffer *dst_buf,
29 struct extent_buffer *src_buf);
30static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
31 int level, int slot);
32
33static const struct btrfs_csums {
34 u16 size;
35 const char name[10];
36 const char driver[12];
37} btrfs_csums[] = {
38 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
39 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
40 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
41 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
42 .driver = "blake2b-256" },
43};
44
45int btrfs_super_csum_size(const struct btrfs_super_block *s)
46{
47 u16 t = btrfs_super_csum_type(s);
48
49
50
51 return btrfs_csums[t].size;
52}
53
54const char *btrfs_super_csum_name(u16 csum_type)
55{
56
57 return btrfs_csums[csum_type].name;
58}
59
60
61
62
63
64const char *btrfs_super_csum_driver(u16 csum_type)
65{
66
67 return btrfs_csums[csum_type].driver[0] ?
68 btrfs_csums[csum_type].driver :
69 btrfs_csums[csum_type].name;
70}
71
72size_t __attribute_const__ btrfs_get_num_csums(void)
73{
74 return ARRAY_SIZE(btrfs_csums);
75}
76
77struct btrfs_path *btrfs_alloc_path(void)
78{
79 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
80}
81
82
83void btrfs_free_path(struct btrfs_path *p)
84{
85 if (!p)
86 return;
87 btrfs_release_path(p);
88 kmem_cache_free(btrfs_path_cachep, p);
89}
90
91
92
93
94
95
96
97noinline void btrfs_release_path(struct btrfs_path *p)
98{
99 int i;
100
101 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
102 p->slots[i] = 0;
103 if (!p->nodes[i])
104 continue;
105 if (p->locks[i]) {
106 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
107 p->locks[i] = 0;
108 }
109 free_extent_buffer(p->nodes[i]);
110 p->nodes[i] = NULL;
111 }
112}
113
114
115
116
117
118
119
120
121
122
123
124struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
125{
126 struct extent_buffer *eb;
127
128 while (1) {
129 rcu_read_lock();
130 eb = rcu_dereference(root->node);
131
132
133
134
135
136
137
138 if (atomic_inc_not_zero(&eb->refs)) {
139 rcu_read_unlock();
140 break;
141 }
142 rcu_read_unlock();
143 synchronize_rcu();
144 }
145 return eb;
146}
147
148
149
150
151
152
153static void add_root_to_dirty_list(struct btrfs_root *root)
154{
155 struct btrfs_fs_info *fs_info = root->fs_info;
156
157 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
158 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
159 return;
160
161 spin_lock(&fs_info->trans_lock);
162 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
163
164 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
165 list_move_tail(&root->dirty_list,
166 &fs_info->dirty_cowonly_roots);
167 else
168 list_move(&root->dirty_list,
169 &fs_info->dirty_cowonly_roots);
170 }
171 spin_unlock(&fs_info->trans_lock);
172}
173
174
175
176
177
178
179int btrfs_copy_root(struct btrfs_trans_handle *trans,
180 struct btrfs_root *root,
181 struct extent_buffer *buf,
182 struct extent_buffer **cow_ret, u64 new_root_objectid)
183{
184 struct btrfs_fs_info *fs_info = root->fs_info;
185 struct extent_buffer *cow;
186 int ret = 0;
187 int level;
188 struct btrfs_disk_key disk_key;
189
190 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
191 trans->transid != fs_info->running_transaction->transid);
192 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
193 trans->transid != root->last_trans);
194
195 level = btrfs_header_level(buf);
196 if (level == 0)
197 btrfs_item_key(buf, &disk_key, 0);
198 else
199 btrfs_node_key(buf, &disk_key, 0);
200
201 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
202 &disk_key, level, buf->start, 0,
203 BTRFS_NESTING_NEW_ROOT);
204 if (IS_ERR(cow))
205 return PTR_ERR(cow);
206
207 copy_extent_buffer_full(cow, buf);
208 btrfs_set_header_bytenr(cow, cow->start);
209 btrfs_set_header_generation(cow, trans->transid);
210 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
211 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
212 BTRFS_HEADER_FLAG_RELOC);
213 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
214 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
215 else
216 btrfs_set_header_owner(cow, new_root_objectid);
217
218 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
219
220 WARN_ON(btrfs_header_generation(buf) > trans->transid);
221 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
222 ret = btrfs_inc_ref(trans, root, cow, 1);
223 else
224 ret = btrfs_inc_ref(trans, root, cow, 0);
225 if (ret) {
226 btrfs_tree_unlock(cow);
227 free_extent_buffer(cow);
228 btrfs_abort_transaction(trans, ret);
229 return ret;
230 }
231
232 btrfs_mark_buffer_dirty(cow);
233 *cow_ret = cow;
234 return 0;
235}
236
237
238
239
240int btrfs_block_can_be_shared(struct btrfs_root *root,
241 struct extent_buffer *buf)
242{
243
244
245
246
247
248 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
249 buf != root->node && buf != root->commit_root &&
250 (btrfs_header_generation(buf) <=
251 btrfs_root_last_snapshot(&root->root_item) ||
252 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
253 return 1;
254
255 return 0;
256}
257
258static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
259 struct btrfs_root *root,
260 struct extent_buffer *buf,
261 struct extent_buffer *cow,
262 int *last_ref)
263{
264 struct btrfs_fs_info *fs_info = root->fs_info;
265 u64 refs;
266 u64 owner;
267 u64 flags;
268 u64 new_flags = 0;
269 int ret;
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288 if (btrfs_block_can_be_shared(root, buf)) {
289 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
290 btrfs_header_level(buf), 1,
291 &refs, &flags);
292 if (ret)
293 return ret;
294 if (refs == 0) {
295 ret = -EROFS;
296 btrfs_handle_fs_error(fs_info, ret, NULL);
297 return ret;
298 }
299 } else {
300 refs = 1;
301 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
302 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
303 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
304 else
305 flags = 0;
306 }
307
308 owner = btrfs_header_owner(buf);
309 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
310 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
311
312 if (refs > 1) {
313 if ((owner == root->root_key.objectid ||
314 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
315 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
316 ret = btrfs_inc_ref(trans, root, buf, 1);
317 if (ret)
318 return ret;
319
320 if (root->root_key.objectid ==
321 BTRFS_TREE_RELOC_OBJECTID) {
322 ret = btrfs_dec_ref(trans, root, buf, 0);
323 if (ret)
324 return ret;
325 ret = btrfs_inc_ref(trans, root, cow, 1);
326 if (ret)
327 return ret;
328 }
329 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
330 } else {
331
332 if (root->root_key.objectid ==
333 BTRFS_TREE_RELOC_OBJECTID)
334 ret = btrfs_inc_ref(trans, root, cow, 1);
335 else
336 ret = btrfs_inc_ref(trans, root, cow, 0);
337 if (ret)
338 return ret;
339 }
340 if (new_flags != 0) {
341 int level = btrfs_header_level(buf);
342
343 ret = btrfs_set_disk_extent_flags(trans, buf,
344 new_flags, level, 0);
345 if (ret)
346 return ret;
347 }
348 } else {
349 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
350 if (root->root_key.objectid ==
351 BTRFS_TREE_RELOC_OBJECTID)
352 ret = btrfs_inc_ref(trans, root, cow, 1);
353 else
354 ret = btrfs_inc_ref(trans, root, cow, 0);
355 if (ret)
356 return ret;
357 ret = btrfs_dec_ref(trans, root, buf, 1);
358 if (ret)
359 return ret;
360 }
361 btrfs_clean_tree_block(buf);
362 *last_ref = 1;
363 }
364 return 0;
365}
366
367
368
369
370
371
372
373
374
375
376
377
378
379static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
380 struct btrfs_root *root,
381 struct extent_buffer *buf,
382 struct extent_buffer *parent, int parent_slot,
383 struct extent_buffer **cow_ret,
384 u64 search_start, u64 empty_size,
385 enum btrfs_lock_nesting nest)
386{
387 struct btrfs_fs_info *fs_info = root->fs_info;
388 struct btrfs_disk_key disk_key;
389 struct extent_buffer *cow;
390 int level, ret;
391 int last_ref = 0;
392 int unlock_orig = 0;
393 u64 parent_start = 0;
394
395 if (*cow_ret == buf)
396 unlock_orig = 1;
397
398 btrfs_assert_tree_locked(buf);
399
400 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
401 trans->transid != fs_info->running_transaction->transid);
402 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
403 trans->transid != root->last_trans);
404
405 level = btrfs_header_level(buf);
406
407 if (level == 0)
408 btrfs_item_key(buf, &disk_key, 0);
409 else
410 btrfs_node_key(buf, &disk_key, 0);
411
412 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
413 parent_start = parent->start;
414
415 cow = btrfs_alloc_tree_block(trans, root, parent_start,
416 root->root_key.objectid, &disk_key, level,
417 search_start, empty_size, nest);
418 if (IS_ERR(cow))
419 return PTR_ERR(cow);
420
421
422
423 copy_extent_buffer_full(cow, buf);
424 btrfs_set_header_bytenr(cow, cow->start);
425 btrfs_set_header_generation(cow, trans->transid);
426 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
427 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
428 BTRFS_HEADER_FLAG_RELOC);
429 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
430 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
431 else
432 btrfs_set_header_owner(cow, root->root_key.objectid);
433
434 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
435
436 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
437 if (ret) {
438 btrfs_tree_unlock(cow);
439 free_extent_buffer(cow);
440 btrfs_abort_transaction(trans, ret);
441 return ret;
442 }
443
444 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
445 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
446 if (ret) {
447 btrfs_tree_unlock(cow);
448 free_extent_buffer(cow);
449 btrfs_abort_transaction(trans, ret);
450 return ret;
451 }
452 }
453
454 if (buf == root->node) {
455 WARN_ON(parent && parent != buf);
456 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
457 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
458 parent_start = buf->start;
459
460 atomic_inc(&cow->refs);
461 ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
462 BUG_ON(ret < 0);
463 rcu_assign_pointer(root->node, cow);
464
465 btrfs_free_tree_block(trans, root, buf, parent_start,
466 last_ref);
467 free_extent_buffer(buf);
468 add_root_to_dirty_list(root);
469 } else {
470 WARN_ON(trans->transid != btrfs_header_generation(parent));
471 btrfs_tree_mod_log_insert_key(parent, parent_slot,
472 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
473 btrfs_set_node_blockptr(parent, parent_slot,
474 cow->start);
475 btrfs_set_node_ptr_generation(parent, parent_slot,
476 trans->transid);
477 btrfs_mark_buffer_dirty(parent);
478 if (last_ref) {
479 ret = btrfs_tree_mod_log_free_eb(buf);
480 if (ret) {
481 btrfs_tree_unlock(cow);
482 free_extent_buffer(cow);
483 btrfs_abort_transaction(trans, ret);
484 return ret;
485 }
486 }
487 btrfs_free_tree_block(trans, root, buf, parent_start,
488 last_ref);
489 }
490 if (unlock_orig)
491 btrfs_tree_unlock(buf);
492 free_extent_buffer_stale(buf);
493 btrfs_mark_buffer_dirty(cow);
494 *cow_ret = cow;
495 return 0;
496}
497
498static inline int should_cow_block(struct btrfs_trans_handle *trans,
499 struct btrfs_root *root,
500 struct extent_buffer *buf)
501{
502 if (btrfs_is_testing(root->fs_info))
503 return 0;
504
505
506 smp_mb__before_atomic();
507
508
509
510
511
512
513
514
515
516
517
518
519 if (btrfs_header_generation(buf) == trans->transid &&
520 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
521 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
522 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
523 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
524 return 0;
525 return 1;
526}
527
528
529
530
531
532
533noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
534 struct btrfs_root *root, struct extent_buffer *buf,
535 struct extent_buffer *parent, int parent_slot,
536 struct extent_buffer **cow_ret,
537 enum btrfs_lock_nesting nest)
538{
539 struct btrfs_fs_info *fs_info = root->fs_info;
540 u64 search_start;
541 int ret;
542
543 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
544 btrfs_err(fs_info,
545 "COW'ing blocks on a fs root that's being dropped");
546
547 if (trans->transaction != fs_info->running_transaction)
548 WARN(1, KERN_CRIT "trans %llu running %llu\n",
549 trans->transid,
550 fs_info->running_transaction->transid);
551
552 if (trans->transid != fs_info->generation)
553 WARN(1, KERN_CRIT "trans %llu running %llu\n",
554 trans->transid, fs_info->generation);
555
556 if (!should_cow_block(trans, root, buf)) {
557 *cow_ret = buf;
558 return 0;
559 }
560
561 search_start = buf->start & ~((u64)SZ_1G - 1);
562
563
564
565
566
567
568
569 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
570 ret = __btrfs_cow_block(trans, root, buf, parent,
571 parent_slot, cow_ret, search_start, 0, nest);
572
573 trace_btrfs_cow_block(root, buf, *cow_ret);
574
575 return ret;
576}
577ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
578
579
580
581
582
583static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
584{
585 if (blocknr < other && other - (blocknr + blocksize) < 32768)
586 return 1;
587 if (blocknr > other && blocknr - (other + blocksize) < 32768)
588 return 1;
589 return 0;
590}
591
592#ifdef __LITTLE_ENDIAN
593
594
595
596
597
598static int comp_keys(const struct btrfs_disk_key *disk_key,
599 const struct btrfs_key *k2)
600{
601 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
602
603 return btrfs_comp_cpu_keys(k1, k2);
604}
605
606#else
607
608
609
610
611static int comp_keys(const struct btrfs_disk_key *disk,
612 const struct btrfs_key *k2)
613{
614 struct btrfs_key k1;
615
616 btrfs_disk_key_to_cpu(&k1, disk);
617
618 return btrfs_comp_cpu_keys(&k1, k2);
619}
620#endif
621
622
623
624
625int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
626{
627 if (k1->objectid > k2->objectid)
628 return 1;
629 if (k1->objectid < k2->objectid)
630 return -1;
631 if (k1->type > k2->type)
632 return 1;
633 if (k1->type < k2->type)
634 return -1;
635 if (k1->offset > k2->offset)
636 return 1;
637 if (k1->offset < k2->offset)
638 return -1;
639 return 0;
640}
641
642
643
644
645
646
647int btrfs_realloc_node(struct btrfs_trans_handle *trans,
648 struct btrfs_root *root, struct extent_buffer *parent,
649 int start_slot, u64 *last_ret,
650 struct btrfs_key *progress)
651{
652 struct btrfs_fs_info *fs_info = root->fs_info;
653 struct extent_buffer *cur;
654 u64 blocknr;
655 u64 search_start = *last_ret;
656 u64 last_block = 0;
657 u64 other;
658 u32 parent_nritems;
659 int end_slot;
660 int i;
661 int err = 0;
662 u32 blocksize;
663 int progress_passed = 0;
664 struct btrfs_disk_key disk_key;
665
666 WARN_ON(trans->transaction != fs_info->running_transaction);
667 WARN_ON(trans->transid != fs_info->generation);
668
669 parent_nritems = btrfs_header_nritems(parent);
670 blocksize = fs_info->nodesize;
671 end_slot = parent_nritems - 1;
672
673 if (parent_nritems <= 1)
674 return 0;
675
676 for (i = start_slot; i <= end_slot; i++) {
677 int close = 1;
678
679 btrfs_node_key(parent, &disk_key, i);
680 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
681 continue;
682
683 progress_passed = 1;
684 blocknr = btrfs_node_blockptr(parent, i);
685 if (last_block == 0)
686 last_block = blocknr;
687
688 if (i > 0) {
689 other = btrfs_node_blockptr(parent, i - 1);
690 close = close_blocks(blocknr, other, blocksize);
691 }
692 if (!close && i < end_slot) {
693 other = btrfs_node_blockptr(parent, i + 1);
694 close = close_blocks(blocknr, other, blocksize);
695 }
696 if (close) {
697 last_block = blocknr;
698 continue;
699 }
700
701 cur = btrfs_read_node_slot(parent, i);
702 if (IS_ERR(cur))
703 return PTR_ERR(cur);
704 if (search_start == 0)
705 search_start = last_block;
706
707 btrfs_tree_lock(cur);
708 err = __btrfs_cow_block(trans, root, cur, parent, i,
709 &cur, search_start,
710 min(16 * blocksize,
711 (end_slot - i) * blocksize),
712 BTRFS_NESTING_COW);
713 if (err) {
714 btrfs_tree_unlock(cur);
715 free_extent_buffer(cur);
716 break;
717 }
718 search_start = cur->start;
719 last_block = cur->start;
720 *last_ret = search_start;
721 btrfs_tree_unlock(cur);
722 free_extent_buffer(cur);
723 }
724 return err;
725}
726
727
728
729
730
731
732
733
734
735
736
737
738static noinline int generic_bin_search(struct extent_buffer *eb,
739 unsigned long p, int item_size,
740 const struct btrfs_key *key, int *slot)
741{
742 int low = 0;
743 int high = btrfs_header_nritems(eb);
744 int ret;
745 const int key_size = sizeof(struct btrfs_disk_key);
746
747 if (low > high) {
748 btrfs_err(eb->fs_info,
749 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
750 __func__, low, high, eb->start,
751 btrfs_header_owner(eb), btrfs_header_level(eb));
752 return -EINVAL;
753 }
754
755 while (low < high) {
756 unsigned long oip;
757 unsigned long offset;
758 struct btrfs_disk_key *tmp;
759 struct btrfs_disk_key unaligned;
760 int mid;
761
762 mid = (low + high) / 2;
763 offset = p + mid * item_size;
764 oip = offset_in_page(offset);
765
766 if (oip + key_size <= PAGE_SIZE) {
767 const unsigned long idx = get_eb_page_index(offset);
768 char *kaddr = page_address(eb->pages[idx]);
769
770 oip = get_eb_offset_in_page(eb, offset);
771 tmp = (struct btrfs_disk_key *)(kaddr + oip);
772 } else {
773 read_extent_buffer(eb, &unaligned, offset, key_size);
774 tmp = &unaligned;
775 }
776
777 ret = comp_keys(tmp, key);
778
779 if (ret < 0)
780 low = mid + 1;
781 else if (ret > 0)
782 high = mid;
783 else {
784 *slot = mid;
785 return 0;
786 }
787 }
788 *slot = low;
789 return 1;
790}
791
792
793
794
795
796int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
797 int *slot)
798{
799 if (btrfs_header_level(eb) == 0)
800 return generic_bin_search(eb,
801 offsetof(struct btrfs_leaf, items),
802 sizeof(struct btrfs_item), key, slot);
803 else
804 return generic_bin_search(eb,
805 offsetof(struct btrfs_node, ptrs),
806 sizeof(struct btrfs_key_ptr), key, slot);
807}
808
809static void root_add_used(struct btrfs_root *root, u32 size)
810{
811 spin_lock(&root->accounting_lock);
812 btrfs_set_root_used(&root->root_item,
813 btrfs_root_used(&root->root_item) + size);
814 spin_unlock(&root->accounting_lock);
815}
816
817static void root_sub_used(struct btrfs_root *root, u32 size)
818{
819 spin_lock(&root->accounting_lock);
820 btrfs_set_root_used(&root->root_item,
821 btrfs_root_used(&root->root_item) - size);
822 spin_unlock(&root->accounting_lock);
823}
824
825
826
827
828struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
829 int slot)
830{
831 int level = btrfs_header_level(parent);
832 struct extent_buffer *eb;
833 struct btrfs_key first_key;
834
835 if (slot < 0 || slot >= btrfs_header_nritems(parent))
836 return ERR_PTR(-ENOENT);
837
838 BUG_ON(level == 0);
839
840 btrfs_node_key_to_cpu(parent, &first_key, slot);
841 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
842 btrfs_header_owner(parent),
843 btrfs_node_ptr_generation(parent, slot),
844 level - 1, &first_key);
845 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
846 free_extent_buffer(eb);
847 eb = ERR_PTR(-EIO);
848 }
849
850 return eb;
851}
852
853
854
855
856
857
858static noinline int balance_level(struct btrfs_trans_handle *trans,
859 struct btrfs_root *root,
860 struct btrfs_path *path, int level)
861{
862 struct btrfs_fs_info *fs_info = root->fs_info;
863 struct extent_buffer *right = NULL;
864 struct extent_buffer *mid;
865 struct extent_buffer *left = NULL;
866 struct extent_buffer *parent = NULL;
867 int ret = 0;
868 int wret;
869 int pslot;
870 int orig_slot = path->slots[level];
871 u64 orig_ptr;
872
873 ASSERT(level > 0);
874
875 mid = path->nodes[level];
876
877 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
878 WARN_ON(btrfs_header_generation(mid) != trans->transid);
879
880 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
881
882 if (level < BTRFS_MAX_LEVEL - 1) {
883 parent = path->nodes[level + 1];
884 pslot = path->slots[level + 1];
885 }
886
887
888
889
890
891 if (!parent) {
892 struct extent_buffer *child;
893
894 if (btrfs_header_nritems(mid) != 1)
895 return 0;
896
897
898 child = btrfs_read_node_slot(mid, 0);
899 if (IS_ERR(child)) {
900 ret = PTR_ERR(child);
901 btrfs_handle_fs_error(fs_info, ret, NULL);
902 goto enospc;
903 }
904
905 btrfs_tree_lock(child);
906 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
907 BTRFS_NESTING_COW);
908 if (ret) {
909 btrfs_tree_unlock(child);
910 free_extent_buffer(child);
911 goto enospc;
912 }
913
914 ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
915 BUG_ON(ret < 0);
916 rcu_assign_pointer(root->node, child);
917
918 add_root_to_dirty_list(root);
919 btrfs_tree_unlock(child);
920
921 path->locks[level] = 0;
922 path->nodes[level] = NULL;
923 btrfs_clean_tree_block(mid);
924 btrfs_tree_unlock(mid);
925
926 free_extent_buffer(mid);
927
928 root_sub_used(root, mid->len);
929 btrfs_free_tree_block(trans, root, mid, 0, 1);
930
931 free_extent_buffer_stale(mid);
932 return 0;
933 }
934 if (btrfs_header_nritems(mid) >
935 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
936 return 0;
937
938 left = btrfs_read_node_slot(parent, pslot - 1);
939 if (IS_ERR(left))
940 left = NULL;
941
942 if (left) {
943 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
944 wret = btrfs_cow_block(trans, root, left,
945 parent, pslot - 1, &left,
946 BTRFS_NESTING_LEFT_COW);
947 if (wret) {
948 ret = wret;
949 goto enospc;
950 }
951 }
952
953 right = btrfs_read_node_slot(parent, pslot + 1);
954 if (IS_ERR(right))
955 right = NULL;
956
957 if (right) {
958 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
959 wret = btrfs_cow_block(trans, root, right,
960 parent, pslot + 1, &right,
961 BTRFS_NESTING_RIGHT_COW);
962 if (wret) {
963 ret = wret;
964 goto enospc;
965 }
966 }
967
968
969 if (left) {
970 orig_slot += btrfs_header_nritems(left);
971 wret = push_node_left(trans, left, mid, 1);
972 if (wret < 0)
973 ret = wret;
974 }
975
976
977
978
979 if (right) {
980 wret = push_node_left(trans, mid, right, 1);
981 if (wret < 0 && wret != -ENOSPC)
982 ret = wret;
983 if (btrfs_header_nritems(right) == 0) {
984 btrfs_clean_tree_block(right);
985 btrfs_tree_unlock(right);
986 del_ptr(root, path, level + 1, pslot + 1);
987 root_sub_used(root, right->len);
988 btrfs_free_tree_block(trans, root, right, 0, 1);
989 free_extent_buffer_stale(right);
990 right = NULL;
991 } else {
992 struct btrfs_disk_key right_key;
993 btrfs_node_key(right, &right_key, 0);
994 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
995 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
996 BUG_ON(ret < 0);
997 btrfs_set_node_key(parent, &right_key, pslot + 1);
998 btrfs_mark_buffer_dirty(parent);
999 }
1000 }
1001 if (btrfs_header_nritems(mid) == 1) {
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011 if (!left) {
1012 ret = -EROFS;
1013 btrfs_handle_fs_error(fs_info, ret, NULL);
1014 goto enospc;
1015 }
1016 wret = balance_node_right(trans, mid, left);
1017 if (wret < 0) {
1018 ret = wret;
1019 goto enospc;
1020 }
1021 if (wret == 1) {
1022 wret = push_node_left(trans, left, mid, 1);
1023 if (wret < 0)
1024 ret = wret;
1025 }
1026 BUG_ON(wret == 1);
1027 }
1028 if (btrfs_header_nritems(mid) == 0) {
1029 btrfs_clean_tree_block(mid);
1030 btrfs_tree_unlock(mid);
1031 del_ptr(root, path, level + 1, pslot);
1032 root_sub_used(root, mid->len);
1033 btrfs_free_tree_block(trans, root, mid, 0, 1);
1034 free_extent_buffer_stale(mid);
1035 mid = NULL;
1036 } else {
1037
1038 struct btrfs_disk_key mid_key;
1039 btrfs_node_key(mid, &mid_key, 0);
1040 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1041 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1042 BUG_ON(ret < 0);
1043 btrfs_set_node_key(parent, &mid_key, pslot);
1044 btrfs_mark_buffer_dirty(parent);
1045 }
1046
1047
1048 if (left) {
1049 if (btrfs_header_nritems(left) > orig_slot) {
1050 atomic_inc(&left->refs);
1051
1052 path->nodes[level] = left;
1053 path->slots[level + 1] -= 1;
1054 path->slots[level] = orig_slot;
1055 if (mid) {
1056 btrfs_tree_unlock(mid);
1057 free_extent_buffer(mid);
1058 }
1059 } else {
1060 orig_slot -= btrfs_header_nritems(left);
1061 path->slots[level] = orig_slot;
1062 }
1063 }
1064
1065 if (orig_ptr !=
1066 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1067 BUG();
1068enospc:
1069 if (right) {
1070 btrfs_tree_unlock(right);
1071 free_extent_buffer(right);
1072 }
1073 if (left) {
1074 if (path->nodes[level] != left)
1075 btrfs_tree_unlock(left);
1076 free_extent_buffer(left);
1077 }
1078 return ret;
1079}
1080
1081
1082
1083
1084
1085static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1086 struct btrfs_root *root,
1087 struct btrfs_path *path, int level)
1088{
1089 struct btrfs_fs_info *fs_info = root->fs_info;
1090 struct extent_buffer *right = NULL;
1091 struct extent_buffer *mid;
1092 struct extent_buffer *left = NULL;
1093 struct extent_buffer *parent = NULL;
1094 int ret = 0;
1095 int wret;
1096 int pslot;
1097 int orig_slot = path->slots[level];
1098
1099 if (level == 0)
1100 return 1;
1101
1102 mid = path->nodes[level];
1103 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1104
1105 if (level < BTRFS_MAX_LEVEL - 1) {
1106 parent = path->nodes[level + 1];
1107 pslot = path->slots[level + 1];
1108 }
1109
1110 if (!parent)
1111 return 1;
1112
1113 left = btrfs_read_node_slot(parent, pslot - 1);
1114 if (IS_ERR(left))
1115 left = NULL;
1116
1117
1118 if (left) {
1119 u32 left_nr;
1120
1121 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
1122
1123 left_nr = btrfs_header_nritems(left);
1124 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1125 wret = 1;
1126 } else {
1127 ret = btrfs_cow_block(trans, root, left, parent,
1128 pslot - 1, &left,
1129 BTRFS_NESTING_LEFT_COW);
1130 if (ret)
1131 wret = 1;
1132 else {
1133 wret = push_node_left(trans, left, mid, 0);
1134 }
1135 }
1136 if (wret < 0)
1137 ret = wret;
1138 if (wret == 0) {
1139 struct btrfs_disk_key disk_key;
1140 orig_slot += left_nr;
1141 btrfs_node_key(mid, &disk_key, 0);
1142 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1143 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1144 BUG_ON(ret < 0);
1145 btrfs_set_node_key(parent, &disk_key, pslot);
1146 btrfs_mark_buffer_dirty(parent);
1147 if (btrfs_header_nritems(left) > orig_slot) {
1148 path->nodes[level] = left;
1149 path->slots[level + 1] -= 1;
1150 path->slots[level] = orig_slot;
1151 btrfs_tree_unlock(mid);
1152 free_extent_buffer(mid);
1153 } else {
1154 orig_slot -=
1155 btrfs_header_nritems(left);
1156 path->slots[level] = orig_slot;
1157 btrfs_tree_unlock(left);
1158 free_extent_buffer(left);
1159 }
1160 return 0;
1161 }
1162 btrfs_tree_unlock(left);
1163 free_extent_buffer(left);
1164 }
1165 right = btrfs_read_node_slot(parent, pslot + 1);
1166 if (IS_ERR(right))
1167 right = NULL;
1168
1169
1170
1171
1172 if (right) {
1173 u32 right_nr;
1174
1175 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
1176
1177 right_nr = btrfs_header_nritems(right);
1178 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1179 wret = 1;
1180 } else {
1181 ret = btrfs_cow_block(trans, root, right,
1182 parent, pslot + 1,
1183 &right, BTRFS_NESTING_RIGHT_COW);
1184 if (ret)
1185 wret = 1;
1186 else {
1187 wret = balance_node_right(trans, right, mid);
1188 }
1189 }
1190 if (wret < 0)
1191 ret = wret;
1192 if (wret == 0) {
1193 struct btrfs_disk_key disk_key;
1194
1195 btrfs_node_key(right, &disk_key, 0);
1196 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1197 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1198 BUG_ON(ret < 0);
1199 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1200 btrfs_mark_buffer_dirty(parent);
1201
1202 if (btrfs_header_nritems(mid) <= orig_slot) {
1203 path->nodes[level] = right;
1204 path->slots[level + 1] += 1;
1205 path->slots[level] = orig_slot -
1206 btrfs_header_nritems(mid);
1207 btrfs_tree_unlock(mid);
1208 free_extent_buffer(mid);
1209 } else {
1210 btrfs_tree_unlock(right);
1211 free_extent_buffer(right);
1212 }
1213 return 0;
1214 }
1215 btrfs_tree_unlock(right);
1216 free_extent_buffer(right);
1217 }
1218 return 1;
1219}
1220
1221
1222
1223
1224
1225static void reada_for_search(struct btrfs_fs_info *fs_info,
1226 struct btrfs_path *path,
1227 int level, int slot, u64 objectid)
1228{
1229 struct extent_buffer *node;
1230 struct btrfs_disk_key disk_key;
1231 u32 nritems;
1232 u64 search;
1233 u64 target;
1234 u64 nread = 0;
1235 u64 nread_max;
1236 u32 nr;
1237 u32 blocksize;
1238 u32 nscan = 0;
1239
1240 if (level != 1 && path->reada != READA_FORWARD_ALWAYS)
1241 return;
1242
1243 if (!path->nodes[level])
1244 return;
1245
1246 node = path->nodes[level];
1247
1248
1249
1250
1251
1252
1253 if (path->reada == READA_FORWARD_ALWAYS) {
1254 if (level > 1)
1255 nread_max = node->fs_info->nodesize;
1256 else
1257 nread_max = SZ_128K;
1258 } else {
1259 nread_max = SZ_64K;
1260 }
1261
1262 search = btrfs_node_blockptr(node, slot);
1263 blocksize = fs_info->nodesize;
1264 if (path->reada != READA_FORWARD_ALWAYS) {
1265 struct extent_buffer *eb;
1266
1267 eb = find_extent_buffer(fs_info, search);
1268 if (eb) {
1269 free_extent_buffer(eb);
1270 return;
1271 }
1272 }
1273
1274 target = search;
1275
1276 nritems = btrfs_header_nritems(node);
1277 nr = slot;
1278
1279 while (1) {
1280 if (path->reada == READA_BACK) {
1281 if (nr == 0)
1282 break;
1283 nr--;
1284 } else if (path->reada == READA_FORWARD ||
1285 path->reada == READA_FORWARD_ALWAYS) {
1286 nr++;
1287 if (nr >= nritems)
1288 break;
1289 }
1290 if (path->reada == READA_BACK && objectid) {
1291 btrfs_node_key(node, &disk_key, nr);
1292 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1293 break;
1294 }
1295 search = btrfs_node_blockptr(node, nr);
1296 if (path->reada == READA_FORWARD_ALWAYS ||
1297 (search <= target && target - search <= 65536) ||
1298 (search > target && search - target <= 65536)) {
1299 btrfs_readahead_node_child(node, nr);
1300 nread += blocksize;
1301 }
1302 nscan++;
1303 if (nread > nread_max || nscan > 32)
1304 break;
1305 }
1306}
1307
1308static noinline void reada_for_balance(struct btrfs_path *path, int level)
1309{
1310 struct extent_buffer *parent;
1311 int slot;
1312 int nritems;
1313
1314 parent = path->nodes[level + 1];
1315 if (!parent)
1316 return;
1317
1318 nritems = btrfs_header_nritems(parent);
1319 slot = path->slots[level + 1];
1320
1321 if (slot > 0)
1322 btrfs_readahead_node_child(parent, slot - 1);
1323 if (slot + 1 < nritems)
1324 btrfs_readahead_node_child(parent, slot + 1);
1325}
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341static noinline void unlock_up(struct btrfs_path *path, int level,
1342 int lowest_unlock, int min_write_lock_level,
1343 int *write_lock_level)
1344{
1345 int i;
1346 int skip_level = level;
1347 int no_skips = 0;
1348 struct extent_buffer *t;
1349
1350 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1351 if (!path->nodes[i])
1352 break;
1353 if (!path->locks[i])
1354 break;
1355 if (!no_skips && path->slots[i] == 0) {
1356 skip_level = i + 1;
1357 continue;
1358 }
1359 if (!no_skips && path->keep_locks) {
1360 u32 nritems;
1361 t = path->nodes[i];
1362 nritems = btrfs_header_nritems(t);
1363 if (nritems < 1 || path->slots[i] >= nritems - 1) {
1364 skip_level = i + 1;
1365 continue;
1366 }
1367 }
1368 if (skip_level < i && i >= lowest_unlock)
1369 no_skips = 1;
1370
1371 t = path->nodes[i];
1372 if (i >= lowest_unlock && i > skip_level) {
1373 btrfs_tree_unlock_rw(t, path->locks[i]);
1374 path->locks[i] = 0;
1375 if (write_lock_level &&
1376 i > min_write_lock_level &&
1377 i <= *write_lock_level) {
1378 *write_lock_level = i - 1;
1379 }
1380 }
1381 }
1382}
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392static int
1393read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
1394 struct extent_buffer **eb_ret, int level, int slot,
1395 const struct btrfs_key *key)
1396{
1397 struct btrfs_fs_info *fs_info = root->fs_info;
1398 u64 blocknr;
1399 u64 gen;
1400 struct extent_buffer *tmp;
1401 struct btrfs_key first_key;
1402 int ret;
1403 int parent_level;
1404
1405 blocknr = btrfs_node_blockptr(*eb_ret, slot);
1406 gen = btrfs_node_ptr_generation(*eb_ret, slot);
1407 parent_level = btrfs_header_level(*eb_ret);
1408 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
1409
1410 tmp = find_extent_buffer(fs_info, blocknr);
1411 if (tmp) {
1412 if (p->reada == READA_FORWARD_ALWAYS)
1413 reada_for_search(fs_info, p, level, slot, key->objectid);
1414
1415
1416 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
1417
1418
1419
1420
1421
1422 if (btrfs_verify_level_key(tmp,
1423 parent_level - 1, &first_key, gen)) {
1424 free_extent_buffer(tmp);
1425 return -EUCLEAN;
1426 }
1427 *eb_ret = tmp;
1428 return 0;
1429 }
1430
1431
1432 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
1433 if (!ret) {
1434 *eb_ret = tmp;
1435 return 0;
1436 }
1437 free_extent_buffer(tmp);
1438 btrfs_release_path(p);
1439 return -EIO;
1440 }
1441
1442
1443
1444
1445
1446
1447
1448
1449 btrfs_unlock_up_safe(p, level + 1);
1450
1451 if (p->reada != READA_NONE)
1452 reada_for_search(fs_info, p, level, slot, key->objectid);
1453
1454 ret = -EAGAIN;
1455 tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
1456 gen, parent_level - 1, &first_key);
1457 if (!IS_ERR(tmp)) {
1458
1459
1460
1461
1462
1463
1464 if (!extent_buffer_uptodate(tmp))
1465 ret = -EIO;
1466 free_extent_buffer(tmp);
1467 } else {
1468 ret = PTR_ERR(tmp);
1469 }
1470
1471 btrfs_release_path(p);
1472 return ret;
1473}
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484static int
1485setup_nodes_for_search(struct btrfs_trans_handle *trans,
1486 struct btrfs_root *root, struct btrfs_path *p,
1487 struct extent_buffer *b, int level, int ins_len,
1488 int *write_lock_level)
1489{
1490 struct btrfs_fs_info *fs_info = root->fs_info;
1491 int ret = 0;
1492
1493 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1494 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
1495
1496 if (*write_lock_level < level + 1) {
1497 *write_lock_level = level + 1;
1498 btrfs_release_path(p);
1499 return -EAGAIN;
1500 }
1501
1502 reada_for_balance(p, level);
1503 ret = split_node(trans, root, p, level);
1504
1505 b = p->nodes[level];
1506 } else if (ins_len < 0 && btrfs_header_nritems(b) <
1507 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
1508
1509 if (*write_lock_level < level + 1) {
1510 *write_lock_level = level + 1;
1511 btrfs_release_path(p);
1512 return -EAGAIN;
1513 }
1514
1515 reada_for_balance(p, level);
1516 ret = balance_level(trans, root, p, level);
1517 if (ret)
1518 return ret;
1519
1520 b = p->nodes[level];
1521 if (!b) {
1522 btrfs_release_path(p);
1523 return -EAGAIN;
1524 }
1525 BUG_ON(btrfs_header_nritems(b) == 1);
1526 }
1527 return ret;
1528}
1529
1530int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
1531 u64 iobjectid, u64 ioff, u8 key_type,
1532 struct btrfs_key *found_key)
1533{
1534 int ret;
1535 struct btrfs_key key;
1536 struct extent_buffer *eb;
1537
1538 ASSERT(path);
1539 ASSERT(found_key);
1540
1541 key.type = key_type;
1542 key.objectid = iobjectid;
1543 key.offset = ioff;
1544
1545 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1546 if (ret < 0)
1547 return ret;
1548
1549 eb = path->nodes[0];
1550 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1551 ret = btrfs_next_leaf(fs_root, path);
1552 if (ret)
1553 return ret;
1554 eb = path->nodes[0];
1555 }
1556
1557 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1558 if (found_key->type != key.type ||
1559 found_key->objectid != key.objectid)
1560 return 1;
1561
1562 return 0;
1563}
1564
1565static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
1566 struct btrfs_path *p,
1567 int write_lock_level)
1568{
1569 struct btrfs_fs_info *fs_info = root->fs_info;
1570 struct extent_buffer *b;
1571 int root_lock;
1572 int level = 0;
1573
1574
1575 root_lock = BTRFS_READ_LOCK;
1576
1577 if (p->search_commit_root) {
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587 if (p->need_commit_sem) {
1588 down_read(&fs_info->commit_root_sem);
1589 b = btrfs_clone_extent_buffer(root->commit_root);
1590 up_read(&fs_info->commit_root_sem);
1591 if (!b)
1592 return ERR_PTR(-ENOMEM);
1593
1594 } else {
1595 b = root->commit_root;
1596 atomic_inc(&b->refs);
1597 }
1598 level = btrfs_header_level(b);
1599
1600
1601
1602
1603 ASSERT(p->skip_locking == 1);
1604
1605 goto out;
1606 }
1607
1608 if (p->skip_locking) {
1609 b = btrfs_root_node(root);
1610 level = btrfs_header_level(b);
1611 goto out;
1612 }
1613
1614
1615
1616
1617
1618 if (write_lock_level < BTRFS_MAX_LEVEL) {
1619
1620
1621
1622
1623 b = btrfs_read_lock_root_node(root);
1624 level = btrfs_header_level(b);
1625 if (level > write_lock_level)
1626 goto out;
1627
1628
1629 btrfs_tree_read_unlock(b);
1630 free_extent_buffer(b);
1631 }
1632
1633 b = btrfs_lock_root_node(root);
1634 root_lock = BTRFS_WRITE_LOCK;
1635
1636
1637 level = btrfs_header_level(b);
1638
1639out:
1640 p->nodes[level] = b;
1641 if (!p->skip_locking)
1642 p->locks[level] = root_lock;
1643
1644
1645
1646 return b;
1647}
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1682 const struct btrfs_key *key, struct btrfs_path *p,
1683 int ins_len, int cow)
1684{
1685 struct extent_buffer *b;
1686 int slot;
1687 int ret;
1688 int err;
1689 int level;
1690 int lowest_unlock = 1;
1691
1692 int write_lock_level = 0;
1693 u8 lowest_level = 0;
1694 int min_write_lock_level;
1695 int prev_cmp;
1696
1697 lowest_level = p->lowest_level;
1698 WARN_ON(lowest_level && ins_len > 0);
1699 WARN_ON(p->nodes[0] != NULL);
1700 BUG_ON(!cow && ins_len);
1701
1702 if (ins_len < 0) {
1703 lowest_unlock = 2;
1704
1705
1706
1707
1708
1709 write_lock_level = 2;
1710 } else if (ins_len > 0) {
1711
1712
1713
1714
1715 write_lock_level = 1;
1716 }
1717
1718 if (!cow)
1719 write_lock_level = -1;
1720
1721 if (cow && (p->keep_locks || p->lowest_level))
1722 write_lock_level = BTRFS_MAX_LEVEL;
1723
1724 min_write_lock_level = write_lock_level;
1725
1726again:
1727 prev_cmp = -1;
1728 b = btrfs_search_slot_get_root(root, p, write_lock_level);
1729 if (IS_ERR(b)) {
1730 ret = PTR_ERR(b);
1731 goto done;
1732 }
1733
1734 while (b) {
1735 int dec = 0;
1736
1737 level = btrfs_header_level(b);
1738
1739 if (cow) {
1740 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
1741
1742
1743
1744
1745
1746
1747 if (!should_cow_block(trans, root, b))
1748 goto cow_done;
1749
1750
1751
1752
1753
1754 if (level > write_lock_level ||
1755 (level + 1 > write_lock_level &&
1756 level + 1 < BTRFS_MAX_LEVEL &&
1757 p->nodes[level + 1])) {
1758 write_lock_level = level + 1;
1759 btrfs_release_path(p);
1760 goto again;
1761 }
1762
1763 if (last_level)
1764 err = btrfs_cow_block(trans, root, b, NULL, 0,
1765 &b,
1766 BTRFS_NESTING_COW);
1767 else
1768 err = btrfs_cow_block(trans, root, b,
1769 p->nodes[level + 1],
1770 p->slots[level + 1], &b,
1771 BTRFS_NESTING_COW);
1772 if (err) {
1773 ret = err;
1774 goto done;
1775 }
1776 }
1777cow_done:
1778 p->nodes[level] = b;
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795 if (!ins_len && !p->keep_locks) {
1796 int u = level + 1;
1797
1798 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
1799 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
1800 p->locks[u] = 0;
1801 }
1802 }
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812 if (prev_cmp == 0) {
1813 slot = 0;
1814 ret = 0;
1815 } else {
1816 ret = btrfs_bin_search(b, key, &slot);
1817 prev_cmp = ret;
1818 if (ret < 0)
1819 goto done;
1820 }
1821
1822 if (level == 0) {
1823 p->slots[level] = slot;
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834 if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
1835 ASSERT(ins_len >= sizeof(struct btrfs_item));
1836 ins_len -= sizeof(struct btrfs_item);
1837 }
1838 if (ins_len > 0 &&
1839 btrfs_leaf_free_space(b) < ins_len) {
1840 if (write_lock_level < 1) {
1841 write_lock_level = 1;
1842 btrfs_release_path(p);
1843 goto again;
1844 }
1845
1846 err = split_leaf(trans, root, key,
1847 p, ins_len, ret == 0);
1848
1849 BUG_ON(err > 0);
1850 if (err) {
1851 ret = err;
1852 goto done;
1853 }
1854 }
1855 if (!p->search_for_split)
1856 unlock_up(p, level, lowest_unlock,
1857 min_write_lock_level, NULL);
1858 goto done;
1859 }
1860 if (ret && slot > 0) {
1861 dec = 1;
1862 slot--;
1863 }
1864 p->slots[level] = slot;
1865 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
1866 &write_lock_level);
1867 if (err == -EAGAIN)
1868 goto again;
1869 if (err) {
1870 ret = err;
1871 goto done;
1872 }
1873 b = p->nodes[level];
1874 slot = p->slots[level];
1875
1876
1877
1878
1879
1880
1881 if (slot == 0 && ins_len && write_lock_level < level + 1) {
1882 write_lock_level = level + 1;
1883 btrfs_release_path(p);
1884 goto again;
1885 }
1886
1887 unlock_up(p, level, lowest_unlock, min_write_lock_level,
1888 &write_lock_level);
1889
1890 if (level == lowest_level) {
1891 if (dec)
1892 p->slots[level]++;
1893 goto done;
1894 }
1895
1896 err = read_block_for_search(root, p, &b, level, slot, key);
1897 if (err == -EAGAIN)
1898 goto again;
1899 if (err) {
1900 ret = err;
1901 goto done;
1902 }
1903
1904 if (!p->skip_locking) {
1905 level = btrfs_header_level(b);
1906 if (level <= write_lock_level) {
1907 btrfs_tree_lock(b);
1908 p->locks[level] = BTRFS_WRITE_LOCK;
1909 } else {
1910 btrfs_tree_read_lock(b);
1911 p->locks[level] = BTRFS_READ_LOCK;
1912 }
1913 p->nodes[level] = b;
1914 }
1915 }
1916 ret = 1;
1917done:
1918 if (ret < 0 && !p->skip_release_on_error)
1919 btrfs_release_path(p);
1920 return ret;
1921}
1922ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
1936 struct btrfs_path *p, u64 time_seq)
1937{
1938 struct btrfs_fs_info *fs_info = root->fs_info;
1939 struct extent_buffer *b;
1940 int slot;
1941 int ret;
1942 int err;
1943 int level;
1944 int lowest_unlock = 1;
1945 u8 lowest_level = 0;
1946
1947 lowest_level = p->lowest_level;
1948 WARN_ON(p->nodes[0] != NULL);
1949
1950 if (p->search_commit_root) {
1951 BUG_ON(time_seq);
1952 return btrfs_search_slot(NULL, root, key, p, 0, 0);
1953 }
1954
1955again:
1956 b = btrfs_get_old_root(root, time_seq);
1957 if (!b) {
1958 ret = -EIO;
1959 goto done;
1960 }
1961 level = btrfs_header_level(b);
1962 p->locks[level] = BTRFS_READ_LOCK;
1963
1964 while (b) {
1965 int dec = 0;
1966
1967 level = btrfs_header_level(b);
1968 p->nodes[level] = b;
1969
1970
1971
1972
1973
1974
1975
1976 btrfs_unlock_up_safe(p, level + 1);
1977
1978 ret = btrfs_bin_search(b, key, &slot);
1979 if (ret < 0)
1980 goto done;
1981
1982 if (level == 0) {
1983 p->slots[level] = slot;
1984 unlock_up(p, level, lowest_unlock, 0, NULL);
1985 goto done;
1986 }
1987
1988 if (ret && slot > 0) {
1989 dec = 1;
1990 slot--;
1991 }
1992 p->slots[level] = slot;
1993 unlock_up(p, level, lowest_unlock, 0, NULL);
1994
1995 if (level == lowest_level) {
1996 if (dec)
1997 p->slots[level]++;
1998 goto done;
1999 }
2000
2001 err = read_block_for_search(root, p, &b, level, slot, key);
2002 if (err == -EAGAIN)
2003 goto again;
2004 if (err) {
2005 ret = err;
2006 goto done;
2007 }
2008
2009 level = btrfs_header_level(b);
2010 btrfs_tree_read_lock(b);
2011 b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
2012 if (!b) {
2013 ret = -ENOMEM;
2014 goto done;
2015 }
2016 p->locks[level] = BTRFS_READ_LOCK;
2017 p->nodes[level] = b;
2018 }
2019 ret = 1;
2020done:
2021 if (ret < 0)
2022 btrfs_release_path(p);
2023
2024 return ret;
2025}
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039int btrfs_search_slot_for_read(struct btrfs_root *root,
2040 const struct btrfs_key *key,
2041 struct btrfs_path *p, int find_higher,
2042 int return_any)
2043{
2044 int ret;
2045 struct extent_buffer *leaf;
2046
2047again:
2048 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2049 if (ret <= 0)
2050 return ret;
2051
2052
2053
2054
2055
2056
2057
2058 leaf = p->nodes[0];
2059
2060 if (find_higher) {
2061 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2062 ret = btrfs_next_leaf(root, p);
2063 if (ret <= 0)
2064 return ret;
2065 if (!return_any)
2066 return 1;
2067
2068
2069
2070
2071 return_any = 0;
2072 find_higher = 0;
2073 btrfs_release_path(p);
2074 goto again;
2075 }
2076 } else {
2077 if (p->slots[0] == 0) {
2078 ret = btrfs_prev_leaf(root, p);
2079 if (ret < 0)
2080 return ret;
2081 if (!ret) {
2082 leaf = p->nodes[0];
2083 if (p->slots[0] == btrfs_header_nritems(leaf))
2084 p->slots[0]--;
2085 return 0;
2086 }
2087 if (!return_any)
2088 return 1;
2089
2090
2091
2092
2093 return_any = 0;
2094 find_higher = 1;
2095 btrfs_release_path(p);
2096 goto again;
2097 } else {
2098 --p->slots[0];
2099 }
2100 }
2101 return 0;
2102}
2103
2104
2105
2106
2107
2108
2109
2110int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
2111 struct btrfs_path *path)
2112{
2113 int ret;
2114
2115 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2116 if (ret > 0)
2117 ret = btrfs_previous_item(root, path, key->objectid, key->type);
2118
2119 if (ret == 0)
2120 btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
2121
2122 return ret;
2123}
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133static void fixup_low_keys(struct btrfs_path *path,
2134 struct btrfs_disk_key *key, int level)
2135{
2136 int i;
2137 struct extent_buffer *t;
2138 int ret;
2139
2140 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2141 int tslot = path->slots[i];
2142
2143 if (!path->nodes[i])
2144 break;
2145 t = path->nodes[i];
2146 ret = btrfs_tree_mod_log_insert_key(t, tslot,
2147 BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC);
2148 BUG_ON(ret < 0);
2149 btrfs_set_node_key(t, key, tslot);
2150 btrfs_mark_buffer_dirty(path->nodes[i]);
2151 if (tslot != 0)
2152 break;
2153 }
2154}
2155
2156
2157
2158
2159
2160
2161
2162void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
2163 struct btrfs_path *path,
2164 const struct btrfs_key *new_key)
2165{
2166 struct btrfs_disk_key disk_key;
2167 struct extent_buffer *eb;
2168 int slot;
2169
2170 eb = path->nodes[0];
2171 slot = path->slots[0];
2172 if (slot > 0) {
2173 btrfs_item_key(eb, &disk_key, slot - 1);
2174 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
2175 btrfs_crit(fs_info,
2176 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2177 slot, btrfs_disk_key_objectid(&disk_key),
2178 btrfs_disk_key_type(&disk_key),
2179 btrfs_disk_key_offset(&disk_key),
2180 new_key->objectid, new_key->type,
2181 new_key->offset);
2182 btrfs_print_leaf(eb);
2183 BUG();
2184 }
2185 }
2186 if (slot < btrfs_header_nritems(eb) - 1) {
2187 btrfs_item_key(eb, &disk_key, slot + 1);
2188 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
2189 btrfs_crit(fs_info,
2190 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2191 slot, btrfs_disk_key_objectid(&disk_key),
2192 btrfs_disk_key_type(&disk_key),
2193 btrfs_disk_key_offset(&disk_key),
2194 new_key->objectid, new_key->type,
2195 new_key->offset);
2196 btrfs_print_leaf(eb);
2197 BUG();
2198 }
2199 }
2200
2201 btrfs_cpu_key_to_disk(&disk_key, new_key);
2202 btrfs_set_item_key(eb, &disk_key, slot);
2203 btrfs_mark_buffer_dirty(eb);
2204 if (slot == 0)
2205 fixup_low_keys(path, &disk_key, 1);
2206}
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228static bool check_sibling_keys(struct extent_buffer *left,
2229 struct extent_buffer *right)
2230{
2231 struct btrfs_key left_last;
2232 struct btrfs_key right_first;
2233 int level = btrfs_header_level(left);
2234 int nr_left = btrfs_header_nritems(left);
2235 int nr_right = btrfs_header_nritems(right);
2236
2237
2238 if (!nr_left || !nr_right)
2239 return false;
2240
2241 if (level) {
2242 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
2243 btrfs_node_key_to_cpu(right, &right_first, 0);
2244 } else {
2245 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
2246 btrfs_item_key_to_cpu(right, &right_first, 0);
2247 }
2248
2249 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
2250 btrfs_crit(left->fs_info,
2251"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
2252 left_last.objectid, left_last.type,
2253 left_last.offset, right_first.objectid,
2254 right_first.type, right_first.offset);
2255 return true;
2256 }
2257 return false;
2258}
2259
2260
2261
2262
2263
2264
2265
2266
2267static int push_node_left(struct btrfs_trans_handle *trans,
2268 struct extent_buffer *dst,
2269 struct extent_buffer *src, int empty)
2270{
2271 struct btrfs_fs_info *fs_info = trans->fs_info;
2272 int push_items = 0;
2273 int src_nritems;
2274 int dst_nritems;
2275 int ret = 0;
2276
2277 src_nritems = btrfs_header_nritems(src);
2278 dst_nritems = btrfs_header_nritems(dst);
2279 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2280 WARN_ON(btrfs_header_generation(src) != trans->transid);
2281 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2282
2283 if (!empty && src_nritems <= 8)
2284 return 1;
2285
2286 if (push_items <= 0)
2287 return 1;
2288
2289 if (empty) {
2290 push_items = min(src_nritems, push_items);
2291 if (push_items < src_nritems) {
2292
2293
2294
2295 if (src_nritems - push_items < 8) {
2296 if (push_items <= 8)
2297 return 1;
2298 push_items -= 8;
2299 }
2300 }
2301 } else
2302 push_items = min(src_nritems - 8, push_items);
2303
2304
2305 if (check_sibling_keys(dst, src)) {
2306 ret = -EUCLEAN;
2307 btrfs_abort_transaction(trans, ret);
2308 return ret;
2309 }
2310 ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
2311 if (ret) {
2312 btrfs_abort_transaction(trans, ret);
2313 return ret;
2314 }
2315 copy_extent_buffer(dst, src,
2316 btrfs_node_key_ptr_offset(dst_nritems),
2317 btrfs_node_key_ptr_offset(0),
2318 push_items * sizeof(struct btrfs_key_ptr));
2319
2320 if (push_items < src_nritems) {
2321
2322
2323
2324
2325 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2326 btrfs_node_key_ptr_offset(push_items),
2327 (src_nritems - push_items) *
2328 sizeof(struct btrfs_key_ptr));
2329 }
2330 btrfs_set_header_nritems(src, src_nritems - push_items);
2331 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2332 btrfs_mark_buffer_dirty(src);
2333 btrfs_mark_buffer_dirty(dst);
2334
2335 return ret;
2336}
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347static int balance_node_right(struct btrfs_trans_handle *trans,
2348 struct extent_buffer *dst,
2349 struct extent_buffer *src)
2350{
2351 struct btrfs_fs_info *fs_info = trans->fs_info;
2352 int push_items = 0;
2353 int max_push;
2354 int src_nritems;
2355 int dst_nritems;
2356 int ret = 0;
2357
2358 WARN_ON(btrfs_header_generation(src) != trans->transid);
2359 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2360
2361 src_nritems = btrfs_header_nritems(src);
2362 dst_nritems = btrfs_header_nritems(dst);
2363 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2364 if (push_items <= 0)
2365 return 1;
2366
2367 if (src_nritems < 4)
2368 return 1;
2369
2370 max_push = src_nritems / 2 + 1;
2371
2372 if (max_push >= src_nritems)
2373 return 1;
2374
2375 if (max_push < push_items)
2376 push_items = max_push;
2377
2378
2379 if (check_sibling_keys(src, dst)) {
2380 ret = -EUCLEAN;
2381 btrfs_abort_transaction(trans, ret);
2382 return ret;
2383 }
2384 ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
2385 BUG_ON(ret < 0);
2386 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2387 btrfs_node_key_ptr_offset(0),
2388 (dst_nritems) *
2389 sizeof(struct btrfs_key_ptr));
2390
2391 ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
2392 push_items);
2393 if (ret) {
2394 btrfs_abort_transaction(trans, ret);
2395 return ret;
2396 }
2397 copy_extent_buffer(dst, src,
2398 btrfs_node_key_ptr_offset(0),
2399 btrfs_node_key_ptr_offset(src_nritems - push_items),
2400 push_items * sizeof(struct btrfs_key_ptr));
2401
2402 btrfs_set_header_nritems(src, src_nritems - push_items);
2403 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2404
2405 btrfs_mark_buffer_dirty(src);
2406 btrfs_mark_buffer_dirty(dst);
2407
2408 return ret;
2409}
2410
2411
2412
2413
2414
2415
2416
2417
2418static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2419 struct btrfs_root *root,
2420 struct btrfs_path *path, int level)
2421{
2422 struct btrfs_fs_info *fs_info = root->fs_info;
2423 u64 lower_gen;
2424 struct extent_buffer *lower;
2425 struct extent_buffer *c;
2426 struct extent_buffer *old;
2427 struct btrfs_disk_key lower_key;
2428 int ret;
2429
2430 BUG_ON(path->nodes[level]);
2431 BUG_ON(path->nodes[level-1] != root->node);
2432
2433 lower = path->nodes[level-1];
2434 if (level == 1)
2435 btrfs_item_key(lower, &lower_key, 0);
2436 else
2437 btrfs_node_key(lower, &lower_key, 0);
2438
2439 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2440 &lower_key, level, root->node->start, 0,
2441 BTRFS_NESTING_NEW_ROOT);
2442 if (IS_ERR(c))
2443 return PTR_ERR(c);
2444
2445 root_add_used(root, fs_info->nodesize);
2446
2447 btrfs_set_header_nritems(c, 1);
2448 btrfs_set_node_key(c, &lower_key, 0);
2449 btrfs_set_node_blockptr(c, 0, lower->start);
2450 lower_gen = btrfs_header_generation(lower);
2451 WARN_ON(lower_gen != trans->transid);
2452
2453 btrfs_set_node_ptr_generation(c, 0, lower_gen);
2454
2455 btrfs_mark_buffer_dirty(c);
2456
2457 old = root->node;
2458 ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
2459 BUG_ON(ret < 0);
2460 rcu_assign_pointer(root->node, c);
2461
2462
2463 free_extent_buffer(old);
2464
2465 add_root_to_dirty_list(root);
2466 atomic_inc(&c->refs);
2467 path->nodes[level] = c;
2468 path->locks[level] = BTRFS_WRITE_LOCK;
2469 path->slots[level] = 0;
2470 return 0;
2471}
2472
2473
2474
2475
2476
2477
2478
2479
2480static void insert_ptr(struct btrfs_trans_handle *trans,
2481 struct btrfs_path *path,
2482 struct btrfs_disk_key *key, u64 bytenr,
2483 int slot, int level)
2484{
2485 struct extent_buffer *lower;
2486 int nritems;
2487 int ret;
2488
2489 BUG_ON(!path->nodes[level]);
2490 btrfs_assert_tree_locked(path->nodes[level]);
2491 lower = path->nodes[level];
2492 nritems = btrfs_header_nritems(lower);
2493 BUG_ON(slot > nritems);
2494 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
2495 if (slot != nritems) {
2496 if (level) {
2497 ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
2498 slot, nritems - slot);
2499 BUG_ON(ret < 0);
2500 }
2501 memmove_extent_buffer(lower,
2502 btrfs_node_key_ptr_offset(slot + 1),
2503 btrfs_node_key_ptr_offset(slot),
2504 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2505 }
2506 if (level) {
2507 ret = btrfs_tree_mod_log_insert_key(lower, slot,
2508 BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS);
2509 BUG_ON(ret < 0);
2510 }
2511 btrfs_set_node_key(lower, key, slot);
2512 btrfs_set_node_blockptr(lower, slot, bytenr);
2513 WARN_ON(trans->transid == 0);
2514 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2515 btrfs_set_header_nritems(lower, nritems + 1);
2516 btrfs_mark_buffer_dirty(lower);
2517}
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528static noinline int split_node(struct btrfs_trans_handle *trans,
2529 struct btrfs_root *root,
2530 struct btrfs_path *path, int level)
2531{
2532 struct btrfs_fs_info *fs_info = root->fs_info;
2533 struct extent_buffer *c;
2534 struct extent_buffer *split;
2535 struct btrfs_disk_key disk_key;
2536 int mid;
2537 int ret;
2538 u32 c_nritems;
2539
2540 c = path->nodes[level];
2541 WARN_ON(btrfs_header_generation(c) != trans->transid);
2542 if (c == root->node) {
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553 ret = insert_new_root(trans, root, path, level + 1);
2554 if (ret)
2555 return ret;
2556 } else {
2557 ret = push_nodes_for_insert(trans, root, path, level);
2558 c = path->nodes[level];
2559 if (!ret && btrfs_header_nritems(c) <
2560 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
2561 return 0;
2562 if (ret < 0)
2563 return ret;
2564 }
2565
2566 c_nritems = btrfs_header_nritems(c);
2567 mid = (c_nritems + 1) / 2;
2568 btrfs_node_key(c, &disk_key, mid);
2569
2570 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2571 &disk_key, level, c->start, 0,
2572 BTRFS_NESTING_SPLIT);
2573 if (IS_ERR(split))
2574 return PTR_ERR(split);
2575
2576 root_add_used(root, fs_info->nodesize);
2577 ASSERT(btrfs_header_level(c) == level);
2578
2579 ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
2580 if (ret) {
2581 btrfs_abort_transaction(trans, ret);
2582 return ret;
2583 }
2584 copy_extent_buffer(split, c,
2585 btrfs_node_key_ptr_offset(0),
2586 btrfs_node_key_ptr_offset(mid),
2587 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2588 btrfs_set_header_nritems(split, c_nritems - mid);
2589 btrfs_set_header_nritems(c, mid);
2590
2591 btrfs_mark_buffer_dirty(c);
2592 btrfs_mark_buffer_dirty(split);
2593
2594 insert_ptr(trans, path, &disk_key, split->start,
2595 path->slots[level + 1] + 1, level + 1);
2596
2597 if (path->slots[level] >= mid) {
2598 path->slots[level] -= mid;
2599 btrfs_tree_unlock(c);
2600 free_extent_buffer(c);
2601 path->nodes[level] = split;
2602 path->slots[level + 1] += 1;
2603 } else {
2604 btrfs_tree_unlock(split);
2605 free_extent_buffer(split);
2606 }
2607 return 0;
2608}
2609
2610
2611
2612
2613
2614
2615static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2616{
2617 struct btrfs_item *start_item;
2618 struct btrfs_item *end_item;
2619 int data_len;
2620 int nritems = btrfs_header_nritems(l);
2621 int end = min(nritems, start + nr) - 1;
2622
2623 if (!nr)
2624 return 0;
2625 start_item = btrfs_item_nr(start);
2626 end_item = btrfs_item_nr(end);
2627 data_len = btrfs_item_offset(l, start_item) +
2628 btrfs_item_size(l, start_item);
2629 data_len = data_len - btrfs_item_offset(l, end_item);
2630 data_len += sizeof(struct btrfs_item) * nr;
2631 WARN_ON(data_len < 0);
2632 return data_len;
2633}
2634
2635
2636
2637
2638
2639
2640noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
2641{
2642 struct btrfs_fs_info *fs_info = leaf->fs_info;
2643 int nritems = btrfs_header_nritems(leaf);
2644 int ret;
2645
2646 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
2647 if (ret < 0) {
2648 btrfs_crit(fs_info,
2649 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
2650 ret,
2651 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
2652 leaf_space_used(leaf, 0, nritems), nritems);
2653 }
2654 return ret;
2655}
2656
2657
2658
2659
2660
2661static noinline int __push_leaf_right(struct btrfs_path *path,
2662 int data_size, int empty,
2663 struct extent_buffer *right,
2664 int free_space, u32 left_nritems,
2665 u32 min_slot)
2666{
2667 struct btrfs_fs_info *fs_info = right->fs_info;
2668 struct extent_buffer *left = path->nodes[0];
2669 struct extent_buffer *upper = path->nodes[1];
2670 struct btrfs_map_token token;
2671 struct btrfs_disk_key disk_key;
2672 int slot;
2673 u32 i;
2674 int push_space = 0;
2675 int push_items = 0;
2676 struct btrfs_item *item;
2677 u32 nr;
2678 u32 right_nritems;
2679 u32 data_end;
2680 u32 this_item_size;
2681
2682 if (empty)
2683 nr = 0;
2684 else
2685 nr = max_t(u32, 1, min_slot);
2686
2687 if (path->slots[0] >= left_nritems)
2688 push_space += data_size;
2689
2690 slot = path->slots[1];
2691 i = left_nritems - 1;
2692 while (i >= nr) {
2693 item = btrfs_item_nr(i);
2694
2695 if (!empty && push_items > 0) {
2696 if (path->slots[0] > i)
2697 break;
2698 if (path->slots[0] == i) {
2699 int space = btrfs_leaf_free_space(left);
2700
2701 if (space + push_space * 2 > free_space)
2702 break;
2703 }
2704 }
2705
2706 if (path->slots[0] == i)
2707 push_space += data_size;
2708
2709 this_item_size = btrfs_item_size(left, item);
2710 if (this_item_size + sizeof(*item) + push_space > free_space)
2711 break;
2712
2713 push_items++;
2714 push_space += this_item_size + sizeof(*item);
2715 if (i == 0)
2716 break;
2717 i--;
2718 }
2719
2720 if (push_items == 0)
2721 goto out_unlock;
2722
2723 WARN_ON(!empty && push_items == left_nritems);
2724
2725
2726 right_nritems = btrfs_header_nritems(right);
2727
2728 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2729 push_space -= leaf_data_end(left);
2730
2731
2732 data_end = leaf_data_end(right);
2733 memmove_extent_buffer(right,
2734 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
2735 BTRFS_LEAF_DATA_OFFSET + data_end,
2736 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
2737
2738
2739 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
2740 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
2741 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
2742 push_space);
2743
2744 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2745 btrfs_item_nr_offset(0),
2746 right_nritems * sizeof(struct btrfs_item));
2747
2748
2749 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2750 btrfs_item_nr_offset(left_nritems - push_items),
2751 push_items * sizeof(struct btrfs_item));
2752
2753
2754 btrfs_init_map_token(&token, right);
2755 right_nritems += push_items;
2756 btrfs_set_header_nritems(right, right_nritems);
2757 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
2758 for (i = 0; i < right_nritems; i++) {
2759 item = btrfs_item_nr(i);
2760 push_space -= btrfs_token_item_size(&token, item);
2761 btrfs_set_token_item_offset(&token, item, push_space);
2762 }
2763
2764 left_nritems -= push_items;
2765 btrfs_set_header_nritems(left, left_nritems);
2766
2767 if (left_nritems)
2768 btrfs_mark_buffer_dirty(left);
2769 else
2770 btrfs_clean_tree_block(left);
2771
2772 btrfs_mark_buffer_dirty(right);
2773
2774 btrfs_item_key(right, &disk_key, 0);
2775 btrfs_set_node_key(upper, &disk_key, slot + 1);
2776 btrfs_mark_buffer_dirty(upper);
2777
2778
2779 if (path->slots[0] >= left_nritems) {
2780 path->slots[0] -= left_nritems;
2781 if (btrfs_header_nritems(path->nodes[0]) == 0)
2782 btrfs_clean_tree_block(path->nodes[0]);
2783 btrfs_tree_unlock(path->nodes[0]);
2784 free_extent_buffer(path->nodes[0]);
2785 path->nodes[0] = right;
2786 path->slots[1] += 1;
2787 } else {
2788 btrfs_tree_unlock(right);
2789 free_extent_buffer(right);
2790 }
2791 return 0;
2792
2793out_unlock:
2794 btrfs_tree_unlock(right);
2795 free_extent_buffer(right);
2796 return 1;
2797}
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2810 *root, struct btrfs_path *path,
2811 int min_data_size, int data_size,
2812 int empty, u32 min_slot)
2813{
2814 struct extent_buffer *left = path->nodes[0];
2815 struct extent_buffer *right;
2816 struct extent_buffer *upper;
2817 int slot;
2818 int free_space;
2819 u32 left_nritems;
2820 int ret;
2821
2822 if (!path->nodes[1])
2823 return 1;
2824
2825 slot = path->slots[1];
2826 upper = path->nodes[1];
2827 if (slot >= btrfs_header_nritems(upper) - 1)
2828 return 1;
2829
2830 btrfs_assert_tree_locked(path->nodes[1]);
2831
2832 right = btrfs_read_node_slot(upper, slot + 1);
2833
2834
2835
2836
2837 if (IS_ERR(right))
2838 return 1;
2839
2840 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
2841
2842 free_space = btrfs_leaf_free_space(right);
2843 if (free_space < data_size)
2844 goto out_unlock;
2845
2846
2847 ret = btrfs_cow_block(trans, root, right, upper,
2848 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
2849 if (ret)
2850 goto out_unlock;
2851
2852 free_space = btrfs_leaf_free_space(right);
2853 if (free_space < data_size)
2854 goto out_unlock;
2855
2856 left_nritems = btrfs_header_nritems(left);
2857 if (left_nritems == 0)
2858 goto out_unlock;
2859
2860 if (check_sibling_keys(left, right)) {
2861 ret = -EUCLEAN;
2862 btrfs_tree_unlock(right);
2863 free_extent_buffer(right);
2864 return ret;
2865 }
2866 if (path->slots[0] == left_nritems && !empty) {
2867
2868
2869
2870
2871 btrfs_tree_unlock(left);
2872 free_extent_buffer(left);
2873 path->nodes[0] = right;
2874 path->slots[0] = 0;
2875 path->slots[1]++;
2876 return 0;
2877 }
2878
2879 return __push_leaf_right(path, min_data_size, empty,
2880 right, free_space, left_nritems, min_slot);
2881out_unlock:
2882 btrfs_tree_unlock(right);
2883 free_extent_buffer(right);
2884 return 1;
2885}
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
2896 int empty, struct extent_buffer *left,
2897 int free_space, u32 right_nritems,
2898 u32 max_slot)
2899{
2900 struct btrfs_fs_info *fs_info = left->fs_info;
2901 struct btrfs_disk_key disk_key;
2902 struct extent_buffer *right = path->nodes[0];
2903 int i;
2904 int push_space = 0;
2905 int push_items = 0;
2906 struct btrfs_item *item;
2907 u32 old_left_nritems;
2908 u32 nr;
2909 int ret = 0;
2910 u32 this_item_size;
2911 u32 old_left_item_size;
2912 struct btrfs_map_token token;
2913
2914 if (empty)
2915 nr = min(right_nritems, max_slot);
2916 else
2917 nr = min(right_nritems - 1, max_slot);
2918
2919 for (i = 0; i < nr; i++) {
2920 item = btrfs_item_nr(i);
2921
2922 if (!empty && push_items > 0) {
2923 if (path->slots[0] < i)
2924 break;
2925 if (path->slots[0] == i) {
2926 int space = btrfs_leaf_free_space(right);
2927
2928 if (space + push_space * 2 > free_space)
2929 break;
2930 }
2931 }
2932
2933 if (path->slots[0] == i)
2934 push_space += data_size;
2935
2936 this_item_size = btrfs_item_size(right, item);
2937 if (this_item_size + sizeof(*item) + push_space > free_space)
2938 break;
2939
2940 push_items++;
2941 push_space += this_item_size + sizeof(*item);
2942 }
2943
2944 if (push_items == 0) {
2945 ret = 1;
2946 goto out;
2947 }
2948 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
2949
2950
2951 copy_extent_buffer(left, right,
2952 btrfs_item_nr_offset(btrfs_header_nritems(left)),
2953 btrfs_item_nr_offset(0),
2954 push_items * sizeof(struct btrfs_item));
2955
2956 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
2957 btrfs_item_offset_nr(right, push_items - 1);
2958
2959 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
2960 leaf_data_end(left) - push_space,
2961 BTRFS_LEAF_DATA_OFFSET +
2962 btrfs_item_offset_nr(right, push_items - 1),
2963 push_space);
2964 old_left_nritems = btrfs_header_nritems(left);
2965 BUG_ON(old_left_nritems <= 0);
2966
2967 btrfs_init_map_token(&token, left);
2968 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2969 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2970 u32 ioff;
2971
2972 item = btrfs_item_nr(i);
2973
2974 ioff = btrfs_token_item_offset(&token, item);
2975 btrfs_set_token_item_offset(&token, item,
2976 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
2977 }
2978 btrfs_set_header_nritems(left, old_left_nritems + push_items);
2979
2980
2981 if (push_items > right_nritems)
2982 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
2983 right_nritems);
2984
2985 if (push_items < right_nritems) {
2986 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2987 leaf_data_end(right);
2988 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
2989 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
2990 BTRFS_LEAF_DATA_OFFSET +
2991 leaf_data_end(right), push_space);
2992
2993 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2994 btrfs_item_nr_offset(push_items),
2995 (btrfs_header_nritems(right) - push_items) *
2996 sizeof(struct btrfs_item));
2997 }
2998
2999 btrfs_init_map_token(&token, right);
3000 right_nritems -= push_items;
3001 btrfs_set_header_nritems(right, right_nritems);
3002 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
3003 for (i = 0; i < right_nritems; i++) {
3004 item = btrfs_item_nr(i);
3005
3006 push_space = push_space - btrfs_token_item_size(&token, item);
3007 btrfs_set_token_item_offset(&token, item, push_space);
3008 }
3009
3010 btrfs_mark_buffer_dirty(left);
3011 if (right_nritems)
3012 btrfs_mark_buffer_dirty(right);
3013 else
3014 btrfs_clean_tree_block(right);
3015
3016 btrfs_item_key(right, &disk_key, 0);
3017 fixup_low_keys(path, &disk_key, 1);
3018
3019
3020 if (path->slots[0] < push_items) {
3021 path->slots[0] += old_left_nritems;
3022 btrfs_tree_unlock(path->nodes[0]);
3023 free_extent_buffer(path->nodes[0]);
3024 path->nodes[0] = left;
3025 path->slots[1] -= 1;
3026 } else {
3027 btrfs_tree_unlock(left);
3028 free_extent_buffer(left);
3029 path->slots[0] -= push_items;
3030 }
3031 BUG_ON(path->slots[0] < 0);
3032 return ret;
3033out:
3034 btrfs_tree_unlock(left);
3035 free_extent_buffer(left);
3036 return ret;
3037}
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3048 *root, struct btrfs_path *path, int min_data_size,
3049 int data_size, int empty, u32 max_slot)
3050{
3051 struct extent_buffer *right = path->nodes[0];
3052 struct extent_buffer *left;
3053 int slot;
3054 int free_space;
3055 u32 right_nritems;
3056 int ret = 0;
3057
3058 slot = path->slots[1];
3059 if (slot == 0)
3060 return 1;
3061 if (!path->nodes[1])
3062 return 1;
3063
3064 right_nritems = btrfs_header_nritems(right);
3065 if (right_nritems == 0)
3066 return 1;
3067
3068 btrfs_assert_tree_locked(path->nodes[1]);
3069
3070 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
3071
3072
3073
3074
3075 if (IS_ERR(left))
3076 return 1;
3077
3078 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
3079
3080 free_space = btrfs_leaf_free_space(left);
3081 if (free_space < data_size) {
3082 ret = 1;
3083 goto out;
3084 }
3085
3086
3087 ret = btrfs_cow_block(trans, root, left,
3088 path->nodes[1], slot - 1, &left,
3089 BTRFS_NESTING_LEFT_COW);
3090 if (ret) {
3091
3092 if (ret == -ENOSPC)
3093 ret = 1;
3094 goto out;
3095 }
3096
3097 free_space = btrfs_leaf_free_space(left);
3098 if (free_space < data_size) {
3099 ret = 1;
3100 goto out;
3101 }
3102
3103 if (check_sibling_keys(left, right)) {
3104 ret = -EUCLEAN;
3105 goto out;
3106 }
3107 return __push_leaf_left(path, min_data_size,
3108 empty, left, free_space, right_nritems,
3109 max_slot);
3110out:
3111 btrfs_tree_unlock(left);
3112 free_extent_buffer(left);
3113 return ret;
3114}
3115
3116
3117
3118
3119
3120static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3121 struct btrfs_path *path,
3122 struct extent_buffer *l,
3123 struct extent_buffer *right,
3124 int slot, int mid, int nritems)
3125{
3126 struct btrfs_fs_info *fs_info = trans->fs_info;
3127 int data_copy_size;
3128 int rt_data_off;
3129 int i;
3130 struct btrfs_disk_key disk_key;
3131 struct btrfs_map_token token;
3132
3133 nritems = nritems - mid;
3134 btrfs_set_header_nritems(right, nritems);
3135 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
3136
3137 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
3138 btrfs_item_nr_offset(mid),
3139 nritems * sizeof(struct btrfs_item));
3140
3141 copy_extent_buffer(right, l,
3142 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
3143 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
3144 leaf_data_end(l), data_copy_size);
3145
3146 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
3147
3148 btrfs_init_map_token(&token, right);
3149 for (i = 0; i < nritems; i++) {
3150 struct btrfs_item *item = btrfs_item_nr(i);
3151 u32 ioff;
3152
3153 ioff = btrfs_token_item_offset(&token, item);
3154 btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
3155 }
3156
3157 btrfs_set_header_nritems(l, mid);
3158 btrfs_item_key(right, &disk_key, 0);
3159 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
3160
3161 btrfs_mark_buffer_dirty(right);
3162 btrfs_mark_buffer_dirty(l);
3163 BUG_ON(path->slots[0] != slot);
3164
3165 if (mid <= slot) {
3166 btrfs_tree_unlock(path->nodes[0]);
3167 free_extent_buffer(path->nodes[0]);
3168 path->nodes[0] = right;
3169 path->slots[0] -= mid;
3170 path->slots[1] += 1;
3171 } else {
3172 btrfs_tree_unlock(right);
3173 free_extent_buffer(right);
3174 }
3175
3176 BUG_ON(path->slots[0] < 0);
3177}
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3190 struct btrfs_root *root,
3191 struct btrfs_path *path,
3192 int data_size)
3193{
3194 int ret;
3195 int progress = 0;
3196 int slot;
3197 u32 nritems;
3198 int space_needed = data_size;
3199
3200 slot = path->slots[0];
3201 if (slot < btrfs_header_nritems(path->nodes[0]))
3202 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3203
3204
3205
3206
3207
3208 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
3209 if (ret < 0)
3210 return ret;
3211
3212 if (ret == 0)
3213 progress++;
3214
3215 nritems = btrfs_header_nritems(path->nodes[0]);
3216
3217
3218
3219
3220 if (path->slots[0] == 0 || path->slots[0] == nritems)
3221 return 0;
3222
3223 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3224 return 0;
3225
3226
3227 slot = path->slots[0];
3228 space_needed = data_size;
3229 if (slot > 0)
3230 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3231 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
3232 if (ret < 0)
3233 return ret;
3234
3235 if (ret == 0)
3236 progress++;
3237
3238 if (progress)
3239 return 0;
3240 return 1;
3241}
3242
3243
3244
3245
3246
3247
3248
3249static noinline int split_leaf(struct btrfs_trans_handle *trans,
3250 struct btrfs_root *root,
3251 const struct btrfs_key *ins_key,
3252 struct btrfs_path *path, int data_size,
3253 int extend)
3254{
3255 struct btrfs_disk_key disk_key;
3256 struct extent_buffer *l;
3257 u32 nritems;
3258 int mid;
3259 int slot;
3260 struct extent_buffer *right;
3261 struct btrfs_fs_info *fs_info = root->fs_info;
3262 int ret = 0;
3263 int wret;
3264 int split;
3265 int num_doubles = 0;
3266 int tried_avoid_double = 0;
3267
3268 l = path->nodes[0];
3269 slot = path->slots[0];
3270 if (extend && data_size + btrfs_item_size_nr(l, slot) +
3271 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
3272 return -EOVERFLOW;
3273
3274
3275 if (data_size && path->nodes[1]) {
3276 int space_needed = data_size;
3277
3278 if (slot < btrfs_header_nritems(l))
3279 space_needed -= btrfs_leaf_free_space(l);
3280
3281 wret = push_leaf_right(trans, root, path, space_needed,
3282 space_needed, 0, 0);
3283 if (wret < 0)
3284 return wret;
3285 if (wret) {
3286 space_needed = data_size;
3287 if (slot > 0)
3288 space_needed -= btrfs_leaf_free_space(l);
3289 wret = push_leaf_left(trans, root, path, space_needed,
3290 space_needed, 0, (u32)-1);
3291 if (wret < 0)
3292 return wret;
3293 }
3294 l = path->nodes[0];
3295
3296
3297 if (btrfs_leaf_free_space(l) >= data_size)
3298 return 0;
3299 }
3300
3301 if (!path->nodes[1]) {
3302 ret = insert_new_root(trans, root, path, 1);
3303 if (ret)
3304 return ret;
3305 }
3306again:
3307 split = 1;
3308 l = path->nodes[0];
3309 slot = path->slots[0];
3310 nritems = btrfs_header_nritems(l);
3311 mid = (nritems + 1) / 2;
3312
3313 if (mid <= slot) {
3314 if (nritems == 1 ||
3315 leaf_space_used(l, mid, nritems - mid) + data_size >
3316 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3317 if (slot >= nritems) {
3318 split = 0;
3319 } else {
3320 mid = slot;
3321 if (mid != nritems &&
3322 leaf_space_used(l, mid, nritems - mid) +
3323 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3324 if (data_size && !tried_avoid_double)
3325 goto push_for_double;
3326 split = 2;
3327 }
3328 }
3329 }
3330 } else {
3331 if (leaf_space_used(l, 0, mid) + data_size >
3332 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3333 if (!extend && data_size && slot == 0) {
3334 split = 0;
3335 } else if ((extend || !data_size) && slot == 0) {
3336 mid = 1;
3337 } else {
3338 mid = slot;
3339 if (mid != nritems &&
3340 leaf_space_used(l, mid, nritems - mid) +
3341 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3342 if (data_size && !tried_avoid_double)
3343 goto push_for_double;
3344 split = 2;
3345 }
3346 }
3347 }
3348 }
3349
3350 if (split == 0)
3351 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3352 else
3353 btrfs_item_key(l, &disk_key, mid);
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3364 &disk_key, 0, l->start, 0,
3365 num_doubles ? BTRFS_NESTING_NEW_ROOT :
3366 BTRFS_NESTING_SPLIT);
3367 if (IS_ERR(right))
3368 return PTR_ERR(right);
3369
3370 root_add_used(root, fs_info->nodesize);
3371
3372 if (split == 0) {
3373 if (mid <= slot) {
3374 btrfs_set_header_nritems(right, 0);
3375 insert_ptr(trans, path, &disk_key,
3376 right->start, path->slots[1] + 1, 1);
3377 btrfs_tree_unlock(path->nodes[0]);
3378 free_extent_buffer(path->nodes[0]);
3379 path->nodes[0] = right;
3380 path->slots[0] = 0;
3381 path->slots[1] += 1;
3382 } else {
3383 btrfs_set_header_nritems(right, 0);
3384 insert_ptr(trans, path, &disk_key,
3385 right->start, path->slots[1], 1);
3386 btrfs_tree_unlock(path->nodes[0]);
3387 free_extent_buffer(path->nodes[0]);
3388 path->nodes[0] = right;
3389 path->slots[0] = 0;
3390 if (path->slots[1] == 0)
3391 fixup_low_keys(path, &disk_key, 1);
3392 }
3393
3394
3395
3396
3397
3398 return ret;
3399 }
3400
3401 copy_for_split(trans, path, l, right, slot, mid, nritems);
3402
3403 if (split == 2) {
3404 BUG_ON(num_doubles != 0);
3405 num_doubles++;
3406 goto again;
3407 }
3408
3409 return 0;
3410
3411push_for_double:
3412 push_for_double_split(trans, root, path, data_size);
3413 tried_avoid_double = 1;
3414 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3415 return 0;
3416 goto again;
3417}
3418
3419static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3420 struct btrfs_root *root,
3421 struct btrfs_path *path, int ins_len)
3422{
3423 struct btrfs_key key;
3424 struct extent_buffer *leaf;
3425 struct btrfs_file_extent_item *fi;
3426 u64 extent_len = 0;
3427 u32 item_size;
3428 int ret;
3429
3430 leaf = path->nodes[0];
3431 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3432
3433 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3434 key.type != BTRFS_EXTENT_CSUM_KEY);
3435
3436 if (btrfs_leaf_free_space(leaf) >= ins_len)
3437 return 0;
3438
3439 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3440 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3441 fi = btrfs_item_ptr(leaf, path->slots[0],
3442 struct btrfs_file_extent_item);
3443 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3444 }
3445 btrfs_release_path(path);
3446
3447 path->keep_locks = 1;
3448 path->search_for_split = 1;
3449 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3450 path->search_for_split = 0;
3451 if (ret > 0)
3452 ret = -EAGAIN;
3453 if (ret < 0)
3454 goto err;
3455
3456 ret = -EAGAIN;
3457 leaf = path->nodes[0];
3458
3459 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3460 goto err;
3461
3462
3463 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
3464 goto err;
3465
3466 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3467 fi = btrfs_item_ptr(leaf, path->slots[0],
3468 struct btrfs_file_extent_item);
3469 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3470 goto err;
3471 }
3472
3473 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3474 if (ret)
3475 goto err;
3476
3477 path->keep_locks = 0;
3478 btrfs_unlock_up_safe(path, 1);
3479 return 0;
3480err:
3481 path->keep_locks = 0;
3482 return ret;
3483}
3484
3485static noinline int split_item(struct btrfs_path *path,
3486 const struct btrfs_key *new_key,
3487 unsigned long split_offset)
3488{
3489 struct extent_buffer *leaf;
3490 struct btrfs_item *item;
3491 struct btrfs_item *new_item;
3492 int slot;
3493 char *buf;
3494 u32 nritems;
3495 u32 item_size;
3496 u32 orig_offset;
3497 struct btrfs_disk_key disk_key;
3498
3499 leaf = path->nodes[0];
3500 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
3501
3502 item = btrfs_item_nr(path->slots[0]);
3503 orig_offset = btrfs_item_offset(leaf, item);
3504 item_size = btrfs_item_size(leaf, item);
3505
3506 buf = kmalloc(item_size, GFP_NOFS);
3507 if (!buf)
3508 return -ENOMEM;
3509
3510 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3511 path->slots[0]), item_size);
3512
3513 slot = path->slots[0] + 1;
3514 nritems = btrfs_header_nritems(leaf);
3515 if (slot != nritems) {
3516
3517 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3518 btrfs_item_nr_offset(slot),
3519 (nritems - slot) * sizeof(struct btrfs_item));
3520 }
3521
3522 btrfs_cpu_key_to_disk(&disk_key, new_key);
3523 btrfs_set_item_key(leaf, &disk_key, slot);
3524
3525 new_item = btrfs_item_nr(slot);
3526
3527 btrfs_set_item_offset(leaf, new_item, orig_offset);
3528 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3529
3530 btrfs_set_item_offset(leaf, item,
3531 orig_offset + item_size - split_offset);
3532 btrfs_set_item_size(leaf, item, split_offset);
3533
3534 btrfs_set_header_nritems(leaf, nritems + 1);
3535
3536
3537 write_extent_buffer(leaf, buf,
3538 btrfs_item_ptr_offset(leaf, path->slots[0]),
3539 split_offset);
3540
3541
3542 write_extent_buffer(leaf, buf + split_offset,
3543 btrfs_item_ptr_offset(leaf, slot),
3544 item_size - split_offset);
3545 btrfs_mark_buffer_dirty(leaf);
3546
3547 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
3548 kfree(buf);
3549 return 0;
3550}
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567int btrfs_split_item(struct btrfs_trans_handle *trans,
3568 struct btrfs_root *root,
3569 struct btrfs_path *path,
3570 const struct btrfs_key *new_key,
3571 unsigned long split_offset)
3572{
3573 int ret;
3574 ret = setup_leaf_for_split(trans, root, path,
3575 sizeof(struct btrfs_item));
3576 if (ret)
3577 return ret;
3578
3579 ret = split_item(path, new_key, split_offset);
3580 return ret;
3581}
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3592 struct btrfs_root *root,
3593 struct btrfs_path *path,
3594 const struct btrfs_key *new_key)
3595{
3596 struct extent_buffer *leaf;
3597 int ret;
3598 u32 item_size;
3599
3600 leaf = path->nodes[0];
3601 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3602 ret = setup_leaf_for_split(trans, root, path,
3603 item_size + sizeof(struct btrfs_item));
3604 if (ret)
3605 return ret;
3606
3607 path->slots[0]++;
3608 setup_items_for_insert(root, path, new_key, &item_size, 1);
3609 leaf = path->nodes[0];
3610 memcpy_extent_buffer(leaf,
3611 btrfs_item_ptr_offset(leaf, path->slots[0]),
3612 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3613 item_size);
3614 return 0;
3615}
3616
3617
3618
3619
3620
3621
3622
3623void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
3624{
3625 int slot;
3626 struct extent_buffer *leaf;
3627 struct btrfs_item *item;
3628 u32 nritems;
3629 unsigned int data_end;
3630 unsigned int old_data_start;
3631 unsigned int old_size;
3632 unsigned int size_diff;
3633 int i;
3634 struct btrfs_map_token token;
3635
3636 leaf = path->nodes[0];
3637 slot = path->slots[0];
3638
3639 old_size = btrfs_item_size_nr(leaf, slot);
3640 if (old_size == new_size)
3641 return;
3642
3643 nritems = btrfs_header_nritems(leaf);
3644 data_end = leaf_data_end(leaf);
3645
3646 old_data_start = btrfs_item_offset_nr(leaf, slot);
3647
3648 size_diff = old_size - new_size;
3649
3650 BUG_ON(slot < 0);
3651 BUG_ON(slot >= nritems);
3652
3653
3654
3655
3656
3657 btrfs_init_map_token(&token, leaf);
3658 for (i = slot; i < nritems; i++) {
3659 u32 ioff;
3660 item = btrfs_item_nr(i);
3661
3662 ioff = btrfs_token_item_offset(&token, item);
3663 btrfs_set_token_item_offset(&token, item, ioff + size_diff);
3664 }
3665
3666
3667 if (from_end) {
3668 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3669 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3670 data_end, old_data_start + new_size - data_end);
3671 } else {
3672 struct btrfs_disk_key disk_key;
3673 u64 offset;
3674
3675 btrfs_item_key(leaf, &disk_key, slot);
3676
3677 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3678 unsigned long ptr;
3679 struct btrfs_file_extent_item *fi;
3680
3681 fi = btrfs_item_ptr(leaf, slot,
3682 struct btrfs_file_extent_item);
3683 fi = (struct btrfs_file_extent_item *)(
3684 (unsigned long)fi - size_diff);
3685
3686 if (btrfs_file_extent_type(leaf, fi) ==
3687 BTRFS_FILE_EXTENT_INLINE) {
3688 ptr = btrfs_item_ptr_offset(leaf, slot);
3689 memmove_extent_buffer(leaf, ptr,
3690 (unsigned long)fi,
3691 BTRFS_FILE_EXTENT_INLINE_DATA_START);
3692 }
3693 }
3694
3695 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3696 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3697 data_end, old_data_start - data_end);
3698
3699 offset = btrfs_disk_key_offset(&disk_key);
3700 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3701 btrfs_set_item_key(leaf, &disk_key, slot);
3702 if (slot == 0)
3703 fixup_low_keys(path, &disk_key, 1);
3704 }
3705
3706 item = btrfs_item_nr(slot);
3707 btrfs_set_item_size(leaf, item, new_size);
3708 btrfs_mark_buffer_dirty(leaf);
3709
3710 if (btrfs_leaf_free_space(leaf) < 0) {
3711 btrfs_print_leaf(leaf);
3712 BUG();
3713 }
3714}
3715
3716
3717
3718
3719void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
3720{
3721 int slot;
3722 struct extent_buffer *leaf;
3723 struct btrfs_item *item;
3724 u32 nritems;
3725 unsigned int data_end;
3726 unsigned int old_data;
3727 unsigned int old_size;
3728 int i;
3729 struct btrfs_map_token token;
3730
3731 leaf = path->nodes[0];
3732
3733 nritems = btrfs_header_nritems(leaf);
3734 data_end = leaf_data_end(leaf);
3735
3736 if (btrfs_leaf_free_space(leaf) < data_size) {
3737 btrfs_print_leaf(leaf);
3738 BUG();
3739 }
3740 slot = path->slots[0];
3741 old_data = btrfs_item_end_nr(leaf, slot);
3742
3743 BUG_ON(slot < 0);
3744 if (slot >= nritems) {
3745 btrfs_print_leaf(leaf);
3746 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
3747 slot, nritems);
3748 BUG();
3749 }
3750
3751
3752
3753
3754
3755 btrfs_init_map_token(&token, leaf);
3756 for (i = slot; i < nritems; i++) {
3757 u32 ioff;
3758 item = btrfs_item_nr(i);
3759
3760 ioff = btrfs_token_item_offset(&token, item);
3761 btrfs_set_token_item_offset(&token, item, ioff - data_size);
3762 }
3763
3764
3765 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3766 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
3767 data_end, old_data - data_end);
3768
3769 data_end = old_data;
3770 old_size = btrfs_item_size_nr(leaf, slot);
3771 item = btrfs_item_nr(slot);
3772 btrfs_set_item_size(leaf, item, old_size + data_size);
3773 btrfs_mark_buffer_dirty(leaf);
3774
3775 if (btrfs_leaf_free_space(leaf) < 0) {
3776 btrfs_print_leaf(leaf);
3777 BUG();
3778 }
3779}
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3793 const struct btrfs_key *cpu_key, u32 *data_size,
3794 int nr)
3795{
3796 struct btrfs_fs_info *fs_info = root->fs_info;
3797 struct btrfs_item *item;
3798 int i;
3799 u32 nritems;
3800 unsigned int data_end;
3801 struct btrfs_disk_key disk_key;
3802 struct extent_buffer *leaf;
3803 int slot;
3804 struct btrfs_map_token token;
3805 u32 total_size;
3806 u32 total_data = 0;
3807
3808 for (i = 0; i < nr; i++)
3809 total_data += data_size[i];
3810 total_size = total_data + (nr * sizeof(struct btrfs_item));
3811
3812 if (path->slots[0] == 0) {
3813 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3814 fixup_low_keys(path, &disk_key, 1);
3815 }
3816 btrfs_unlock_up_safe(path, 1);
3817
3818 leaf = path->nodes[0];
3819 slot = path->slots[0];
3820
3821 nritems = btrfs_header_nritems(leaf);
3822 data_end = leaf_data_end(leaf);
3823
3824 if (btrfs_leaf_free_space(leaf) < total_size) {
3825 btrfs_print_leaf(leaf);
3826 btrfs_crit(fs_info, "not enough freespace need %u have %d",
3827 total_size, btrfs_leaf_free_space(leaf));
3828 BUG();
3829 }
3830
3831 btrfs_init_map_token(&token, leaf);
3832 if (slot != nritems) {
3833 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3834
3835 if (old_data < data_end) {
3836 btrfs_print_leaf(leaf);
3837 btrfs_crit(fs_info,
3838 "item at slot %d with data offset %u beyond data end of leaf %u",
3839 slot, old_data, data_end);
3840 BUG();
3841 }
3842
3843
3844
3845
3846 for (i = slot; i < nritems; i++) {
3847 u32 ioff;
3848
3849 item = btrfs_item_nr(i);
3850 ioff = btrfs_token_item_offset(&token, item);
3851 btrfs_set_token_item_offset(&token, item,
3852 ioff - total_data);
3853 }
3854
3855 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3856 btrfs_item_nr_offset(slot),
3857 (nritems - slot) * sizeof(struct btrfs_item));
3858
3859
3860 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3861 data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
3862 data_end, old_data - data_end);
3863 data_end = old_data;
3864 }
3865
3866
3867 for (i = 0; i < nr; i++) {
3868 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3869 btrfs_set_item_key(leaf, &disk_key, slot + i);
3870 item = btrfs_item_nr(slot + i);
3871 data_end -= data_size[i];
3872 btrfs_set_token_item_offset(&token, item, data_end);
3873 btrfs_set_token_item_size(&token, item, data_size[i]);
3874 }
3875
3876 btrfs_set_header_nritems(leaf, nritems + nr);
3877 btrfs_mark_buffer_dirty(leaf);
3878
3879 if (btrfs_leaf_free_space(leaf) < 0) {
3880 btrfs_print_leaf(leaf);
3881 BUG();
3882 }
3883}
3884
3885
3886
3887
3888
3889int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3890 struct btrfs_root *root,
3891 struct btrfs_path *path,
3892 const struct btrfs_key *cpu_key, u32 *data_size,
3893 int nr)
3894{
3895 int ret = 0;
3896 int slot;
3897 int i;
3898 u32 total_size = 0;
3899 u32 total_data = 0;
3900
3901 for (i = 0; i < nr; i++)
3902 total_data += data_size[i];
3903
3904 total_size = total_data + (nr * sizeof(struct btrfs_item));
3905 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3906 if (ret == 0)
3907 return -EEXIST;
3908 if (ret < 0)
3909 return ret;
3910
3911 slot = path->slots[0];
3912 BUG_ON(slot < 0);
3913
3914 setup_items_for_insert(root, path, cpu_key, data_size, nr);
3915 return 0;
3916}
3917
3918
3919
3920
3921
3922int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3923 const struct btrfs_key *cpu_key, void *data,
3924 u32 data_size)
3925{
3926 int ret = 0;
3927 struct btrfs_path *path;
3928 struct extent_buffer *leaf;
3929 unsigned long ptr;
3930
3931 path = btrfs_alloc_path();
3932 if (!path)
3933 return -ENOMEM;
3934 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3935 if (!ret) {
3936 leaf = path->nodes[0];
3937 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3938 write_extent_buffer(leaf, data, ptr, data_size);
3939 btrfs_mark_buffer_dirty(leaf);
3940 }
3941 btrfs_free_path(path);
3942 return ret;
3943}
3944
3945
3946
3947
3948
3949
3950
3951static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
3952 int level, int slot)
3953{
3954 struct extent_buffer *parent = path->nodes[level];
3955 u32 nritems;
3956 int ret;
3957
3958 nritems = btrfs_header_nritems(parent);
3959 if (slot != nritems - 1) {
3960 if (level) {
3961 ret = btrfs_tree_mod_log_insert_move(parent, slot,
3962 slot + 1, nritems - slot - 1);
3963 BUG_ON(ret < 0);
3964 }
3965 memmove_extent_buffer(parent,
3966 btrfs_node_key_ptr_offset(slot),
3967 btrfs_node_key_ptr_offset(slot + 1),
3968 sizeof(struct btrfs_key_ptr) *
3969 (nritems - slot - 1));
3970 } else if (level) {
3971 ret = btrfs_tree_mod_log_insert_key(parent, slot,
3972 BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS);
3973 BUG_ON(ret < 0);
3974 }
3975
3976 nritems--;
3977 btrfs_set_header_nritems(parent, nritems);
3978 if (nritems == 0 && parent == root->node) {
3979 BUG_ON(btrfs_header_level(root->node) != 1);
3980
3981 btrfs_set_header_level(root->node, 0);
3982 } else if (slot == 0) {
3983 struct btrfs_disk_key disk_key;
3984
3985 btrfs_node_key(parent, &disk_key, 0);
3986 fixup_low_keys(path, &disk_key, level + 1);
3987 }
3988 btrfs_mark_buffer_dirty(parent);
3989}
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4002 struct btrfs_root *root,
4003 struct btrfs_path *path,
4004 struct extent_buffer *leaf)
4005{
4006 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4007 del_ptr(root, path, 1, path->slots[1]);
4008
4009
4010
4011
4012
4013 btrfs_unlock_up_safe(path, 0);
4014
4015 root_sub_used(root, leaf->len);
4016
4017 atomic_inc(&leaf->refs);
4018 btrfs_free_tree_block(trans, root, leaf, 0, 1);
4019 free_extent_buffer_stale(leaf);
4020}
4021
4022
4023
4024
4025int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4026 struct btrfs_path *path, int slot, int nr)
4027{
4028 struct btrfs_fs_info *fs_info = root->fs_info;
4029 struct extent_buffer *leaf;
4030 struct btrfs_item *item;
4031 u32 last_off;
4032 u32 dsize = 0;
4033 int ret = 0;
4034 int wret;
4035 int i;
4036 u32 nritems;
4037
4038 leaf = path->nodes[0];
4039 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
4040
4041 for (i = 0; i < nr; i++)
4042 dsize += btrfs_item_size_nr(leaf, slot + i);
4043
4044 nritems = btrfs_header_nritems(leaf);
4045
4046 if (slot + nr != nritems) {
4047 int data_end = leaf_data_end(leaf);
4048 struct btrfs_map_token token;
4049
4050 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4051 data_end + dsize,
4052 BTRFS_LEAF_DATA_OFFSET + data_end,
4053 last_off - data_end);
4054
4055 btrfs_init_map_token(&token, leaf);
4056 for (i = slot + nr; i < nritems; i++) {
4057 u32 ioff;
4058
4059 item = btrfs_item_nr(i);
4060 ioff = btrfs_token_item_offset(&token, item);
4061 btrfs_set_token_item_offset(&token, item, ioff + dsize);
4062 }
4063
4064 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
4065 btrfs_item_nr_offset(slot + nr),
4066 sizeof(struct btrfs_item) *
4067 (nritems - slot - nr));
4068 }
4069 btrfs_set_header_nritems(leaf, nritems - nr);
4070 nritems -= nr;
4071
4072
4073 if (nritems == 0) {
4074 if (leaf == root->node) {
4075 btrfs_set_header_level(leaf, 0);
4076 } else {
4077 btrfs_clean_tree_block(leaf);
4078 btrfs_del_leaf(trans, root, path, leaf);
4079 }
4080 } else {
4081 int used = leaf_space_used(leaf, 0, nritems);
4082 if (slot == 0) {
4083 struct btrfs_disk_key disk_key;
4084
4085 btrfs_item_key(leaf, &disk_key, 0);
4086 fixup_low_keys(path, &disk_key, 1);
4087 }
4088
4089
4090 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
4091
4092
4093
4094
4095 slot = path->slots[1];
4096 atomic_inc(&leaf->refs);
4097
4098 wret = push_leaf_left(trans, root, path, 1, 1,
4099 1, (u32)-1);
4100 if (wret < 0 && wret != -ENOSPC)
4101 ret = wret;
4102
4103 if (path->nodes[0] == leaf &&
4104 btrfs_header_nritems(leaf)) {
4105 wret = push_leaf_right(trans, root, path, 1,
4106 1, 1, 0);
4107 if (wret < 0 && wret != -ENOSPC)
4108 ret = wret;
4109 }
4110
4111 if (btrfs_header_nritems(leaf) == 0) {
4112 path->slots[1] = slot;
4113 btrfs_del_leaf(trans, root, path, leaf);
4114 free_extent_buffer(leaf);
4115 ret = 0;
4116 } else {
4117
4118
4119
4120
4121
4122 if (path->nodes[0] == leaf)
4123 btrfs_mark_buffer_dirty(leaf);
4124 free_extent_buffer(leaf);
4125 }
4126 } else {
4127 btrfs_mark_buffer_dirty(leaf);
4128 }
4129 }
4130 return ret;
4131}
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4142{
4143 struct btrfs_key key;
4144 struct btrfs_disk_key found_key;
4145 int ret;
4146
4147 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
4148
4149 if (key.offset > 0) {
4150 key.offset--;
4151 } else if (key.type > 0) {
4152 key.type--;
4153 key.offset = (u64)-1;
4154 } else if (key.objectid > 0) {
4155 key.objectid--;
4156 key.type = (u8)-1;
4157 key.offset = (u64)-1;
4158 } else {
4159 return 1;
4160 }
4161
4162 btrfs_release_path(path);
4163 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4164 if (ret < 0)
4165 return ret;
4166 btrfs_item_key(path->nodes[0], &found_key, 0);
4167 ret = comp_keys(&found_key, &key);
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178 if (ret <= 0)
4179 return 0;
4180 return 1;
4181}
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4203 struct btrfs_path *path,
4204 u64 min_trans)
4205{
4206 struct extent_buffer *cur;
4207 struct btrfs_key found_key;
4208 int slot;
4209 int sret;
4210 u32 nritems;
4211 int level;
4212 int ret = 1;
4213 int keep_locks = path->keep_locks;
4214
4215 path->keep_locks = 1;
4216again:
4217 cur = btrfs_read_lock_root_node(root);
4218 level = btrfs_header_level(cur);
4219 WARN_ON(path->nodes[level]);
4220 path->nodes[level] = cur;
4221 path->locks[level] = BTRFS_READ_LOCK;
4222
4223 if (btrfs_header_generation(cur) < min_trans) {
4224 ret = 1;
4225 goto out;
4226 }
4227 while (1) {
4228 nritems = btrfs_header_nritems(cur);
4229 level = btrfs_header_level(cur);
4230 sret = btrfs_bin_search(cur, min_key, &slot);
4231 if (sret < 0) {
4232 ret = sret;
4233 goto out;
4234 }
4235
4236
4237 if (level == path->lowest_level) {
4238 if (slot >= nritems)
4239 goto find_next_key;
4240 ret = 0;
4241 path->slots[level] = slot;
4242 btrfs_item_key_to_cpu(cur, &found_key, slot);
4243 goto out;
4244 }
4245 if (sret && slot > 0)
4246 slot--;
4247
4248
4249
4250
4251 while (slot < nritems) {
4252 u64 gen;
4253
4254 gen = btrfs_node_ptr_generation(cur, slot);
4255 if (gen < min_trans) {
4256 slot++;
4257 continue;
4258 }
4259 break;
4260 }
4261find_next_key:
4262
4263
4264
4265
4266 if (slot >= nritems) {
4267 path->slots[level] = slot;
4268 sret = btrfs_find_next_key(root, path, min_key, level,
4269 min_trans);
4270 if (sret == 0) {
4271 btrfs_release_path(path);
4272 goto again;
4273 } else {
4274 goto out;
4275 }
4276 }
4277
4278 btrfs_node_key_to_cpu(cur, &found_key, slot);
4279 path->slots[level] = slot;
4280 if (level == path->lowest_level) {
4281 ret = 0;
4282 goto out;
4283 }
4284 cur = btrfs_read_node_slot(cur, slot);
4285 if (IS_ERR(cur)) {
4286 ret = PTR_ERR(cur);
4287 goto out;
4288 }
4289
4290 btrfs_tree_read_lock(cur);
4291
4292 path->locks[level - 1] = BTRFS_READ_LOCK;
4293 path->nodes[level - 1] = cur;
4294 unlock_up(path, level, 1, 0, NULL);
4295 }
4296out:
4297 path->keep_locks = keep_locks;
4298 if (ret == 0) {
4299 btrfs_unlock_up_safe(path, path->lowest_level + 1);
4300 memcpy(min_key, &found_key, sizeof(found_key));
4301 }
4302 return ret;
4303}
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4317 struct btrfs_key *key, int level, u64 min_trans)
4318{
4319 int slot;
4320 struct extent_buffer *c;
4321
4322 WARN_ON(!path->keep_locks && !path->skip_locking);
4323 while (level < BTRFS_MAX_LEVEL) {
4324 if (!path->nodes[level])
4325 return 1;
4326
4327 slot = path->slots[level] + 1;
4328 c = path->nodes[level];
4329next:
4330 if (slot >= btrfs_header_nritems(c)) {
4331 int ret;
4332 int orig_lowest;
4333 struct btrfs_key cur_key;
4334 if (level + 1 >= BTRFS_MAX_LEVEL ||
4335 !path->nodes[level + 1])
4336 return 1;
4337
4338 if (path->locks[level + 1] || path->skip_locking) {
4339 level++;
4340 continue;
4341 }
4342
4343 slot = btrfs_header_nritems(c) - 1;
4344 if (level == 0)
4345 btrfs_item_key_to_cpu(c, &cur_key, slot);
4346 else
4347 btrfs_node_key_to_cpu(c, &cur_key, slot);
4348
4349 orig_lowest = path->lowest_level;
4350 btrfs_release_path(path);
4351 path->lowest_level = level;
4352 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4353 0, 0);
4354 path->lowest_level = orig_lowest;
4355 if (ret < 0)
4356 return ret;
4357
4358 c = path->nodes[level];
4359 slot = path->slots[level];
4360 if (ret == 0)
4361 slot++;
4362 goto next;
4363 }
4364
4365 if (level == 0)
4366 btrfs_item_key_to_cpu(c, key, slot);
4367 else {
4368 u64 gen = btrfs_node_ptr_generation(c, slot);
4369
4370 if (gen < min_trans) {
4371 slot++;
4372 goto next;
4373 }
4374 btrfs_node_key_to_cpu(c, key, slot);
4375 }
4376 return 0;
4377 }
4378 return 1;
4379}
4380
4381int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
4382 u64 time_seq)
4383{
4384 int slot;
4385 int level;
4386 struct extent_buffer *c;
4387 struct extent_buffer *next;
4388 struct btrfs_key key;
4389 u32 nritems;
4390 int ret;
4391 int i;
4392
4393 nritems = btrfs_header_nritems(path->nodes[0]);
4394 if (nritems == 0)
4395 return 1;
4396
4397 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4398again:
4399 level = 1;
4400 next = NULL;
4401 btrfs_release_path(path);
4402
4403 path->keep_locks = 1;
4404
4405 if (time_seq)
4406 ret = btrfs_search_old_slot(root, &key, path, time_seq);
4407 else
4408 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4409 path->keep_locks = 0;
4410
4411 if (ret < 0)
4412 return ret;
4413
4414 nritems = btrfs_header_nritems(path->nodes[0]);
4415
4416
4417
4418
4419
4420
4421 if (nritems > 0 && path->slots[0] < nritems - 1) {
4422 if (ret == 0)
4423 path->slots[0]++;
4424 ret = 0;
4425 goto done;
4426 }
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
4442 ret = 0;
4443 goto done;
4444 }
4445
4446 while (level < BTRFS_MAX_LEVEL) {
4447 if (!path->nodes[level]) {
4448 ret = 1;
4449 goto done;
4450 }
4451
4452 slot = path->slots[level] + 1;
4453 c = path->nodes[level];
4454 if (slot >= btrfs_header_nritems(c)) {
4455 level++;
4456 if (level == BTRFS_MAX_LEVEL) {
4457 ret = 1;
4458 goto done;
4459 }
4460 continue;
4461 }
4462
4463
4464
4465
4466
4467
4468
4469 for (i = 0; i < level; i++) {
4470 if (path->locks[level]) {
4471 btrfs_tree_read_unlock(path->nodes[i]);
4472 path->locks[i] = 0;
4473 }
4474 free_extent_buffer(path->nodes[i]);
4475 path->nodes[i] = NULL;
4476 }
4477
4478 next = c;
4479 ret = read_block_for_search(root, path, &next, level,
4480 slot, &key);
4481 if (ret == -EAGAIN)
4482 goto again;
4483
4484 if (ret < 0) {
4485 btrfs_release_path(path);
4486 goto done;
4487 }
4488
4489 if (!path->skip_locking) {
4490 ret = btrfs_try_tree_read_lock(next);
4491 if (!ret && time_seq) {
4492
4493
4494
4495
4496
4497
4498
4499 free_extent_buffer(next);
4500 btrfs_release_path(path);
4501 cond_resched();
4502 goto again;
4503 }
4504 if (!ret)
4505 btrfs_tree_read_lock(next);
4506 }
4507 break;
4508 }
4509 path->slots[level] = slot;
4510 while (1) {
4511 level--;
4512 path->nodes[level] = next;
4513 path->slots[level] = 0;
4514 if (!path->skip_locking)
4515 path->locks[level] = BTRFS_READ_LOCK;
4516 if (!level)
4517 break;
4518
4519 ret = read_block_for_search(root, path, &next, level,
4520 0, &key);
4521 if (ret == -EAGAIN)
4522 goto again;
4523
4524 if (ret < 0) {
4525 btrfs_release_path(path);
4526 goto done;
4527 }
4528
4529 if (!path->skip_locking)
4530 btrfs_tree_read_lock(next);
4531 }
4532 ret = 0;
4533done:
4534 unlock_up(path, 0, 1, 0, NULL);
4535
4536 return ret;
4537}
4538
4539
4540
4541
4542
4543
4544
4545int btrfs_previous_item(struct btrfs_root *root,
4546 struct btrfs_path *path, u64 min_objectid,
4547 int type)
4548{
4549 struct btrfs_key found_key;
4550 struct extent_buffer *leaf;
4551 u32 nritems;
4552 int ret;
4553
4554 while (1) {
4555 if (path->slots[0] == 0) {
4556 ret = btrfs_prev_leaf(root, path);
4557 if (ret != 0)
4558 return ret;
4559 } else {
4560 path->slots[0]--;
4561 }
4562 leaf = path->nodes[0];
4563 nritems = btrfs_header_nritems(leaf);
4564 if (nritems == 0)
4565 return 1;
4566 if (path->slots[0] == nritems)
4567 path->slots[0]--;
4568
4569 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4570 if (found_key.objectid < min_objectid)
4571 break;
4572 if (found_key.type == type)
4573 return 0;
4574 if (found_key.objectid == min_objectid &&
4575 found_key.type < type)
4576 break;
4577 }
4578 return 1;
4579}
4580
4581
4582
4583
4584
4585
4586
4587int btrfs_previous_extent_item(struct btrfs_root *root,
4588 struct btrfs_path *path, u64 min_objectid)
4589{
4590 struct btrfs_key found_key;
4591 struct extent_buffer *leaf;
4592 u32 nritems;
4593 int ret;
4594
4595 while (1) {
4596 if (path->slots[0] == 0) {
4597 ret = btrfs_prev_leaf(root, path);
4598 if (ret != 0)
4599 return ret;
4600 } else {
4601 path->slots[0]--;
4602 }
4603 leaf = path->nodes[0];
4604 nritems = btrfs_header_nritems(leaf);
4605 if (nritems == 0)
4606 return 1;
4607 if (path->slots[0] == nritems)
4608 path->slots[0]--;
4609
4610 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4611 if (found_key.objectid < min_objectid)
4612 break;
4613 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4614 found_key.type == BTRFS_METADATA_ITEM_KEY)
4615 return 0;
4616 if (found_key.objectid == min_objectid &&
4617 found_key.type < BTRFS_EXTENT_ITEM_KEY)
4618 break;
4619 }
4620 return 1;
4621}
4622