1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/slab.h>
8#include <linux/rbtree.h>
9#include <linux/mm.h>
10#include <linux/error-injection.h>
11#include "ctree.h"
12#include "disk-io.h"
13#include "transaction.h"
14#include "print-tree.h"
15#include "locking.h"
16#include "volumes.h"
17#include "qgroup.h"
18#include "tree-mod-log.h"
19
20static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
21 *root, struct btrfs_path *path, int level);
22static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
23 const struct btrfs_key *ins_key, struct btrfs_path *path,
24 int data_size, int extend);
25static int push_node_left(struct btrfs_trans_handle *trans,
26 struct extent_buffer *dst,
27 struct extent_buffer *src, int empty);
28static int balance_node_right(struct btrfs_trans_handle *trans,
29 struct extent_buffer *dst_buf,
30 struct extent_buffer *src_buf);
31static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
32 int level, int slot);
33
34static const struct btrfs_csums {
35 u16 size;
36 const char name[10];
37 const char driver[12];
38} btrfs_csums[] = {
39 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
40 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
41 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
42 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
43 .driver = "blake2b-256" },
44};
45
46int btrfs_super_csum_size(const struct btrfs_super_block *s)
47{
48 u16 t = btrfs_super_csum_type(s);
49
50
51
52 return btrfs_csums[t].size;
53}
54
55const char *btrfs_super_csum_name(u16 csum_type)
56{
57
58 return btrfs_csums[csum_type].name;
59}
60
61
62
63
64
65const char *btrfs_super_csum_driver(u16 csum_type)
66{
67
68 return btrfs_csums[csum_type].driver[0] ?
69 btrfs_csums[csum_type].driver :
70 btrfs_csums[csum_type].name;
71}
72
73size_t __attribute_const__ btrfs_get_num_csums(void)
74{
75 return ARRAY_SIZE(btrfs_csums);
76}
77
78struct btrfs_path *btrfs_alloc_path(void)
79{
80 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
81}
82
83
84void btrfs_free_path(struct btrfs_path *p)
85{
86 if (!p)
87 return;
88 btrfs_release_path(p);
89 kmem_cache_free(btrfs_path_cachep, p);
90}
91
92
93
94
95
96
97
98noinline void btrfs_release_path(struct btrfs_path *p)
99{
100 int i;
101
102 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
103 p->slots[i] = 0;
104 if (!p->nodes[i])
105 continue;
106 if (p->locks[i]) {
107 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
108 p->locks[i] = 0;
109 }
110 free_extent_buffer(p->nodes[i]);
111 p->nodes[i] = NULL;
112 }
113}
114
115
116
117
118
119
120
121
122
123
124
125struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
126{
127 struct extent_buffer *eb;
128
129 while (1) {
130 rcu_read_lock();
131 eb = rcu_dereference(root->node);
132
133
134
135
136
137
138
139 if (atomic_inc_not_zero(&eb->refs)) {
140 rcu_read_unlock();
141 break;
142 }
143 rcu_read_unlock();
144 synchronize_rcu();
145 }
146 return eb;
147}
148
149
150
151
152
153
154static void add_root_to_dirty_list(struct btrfs_root *root)
155{
156 struct btrfs_fs_info *fs_info = root->fs_info;
157
158 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
159 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
160 return;
161
162 spin_lock(&fs_info->trans_lock);
163 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
164
165 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
166 list_move_tail(&root->dirty_list,
167 &fs_info->dirty_cowonly_roots);
168 else
169 list_move(&root->dirty_list,
170 &fs_info->dirty_cowonly_roots);
171 }
172 spin_unlock(&fs_info->trans_lock);
173}
174
175
176
177
178
179
180int btrfs_copy_root(struct btrfs_trans_handle *trans,
181 struct btrfs_root *root,
182 struct extent_buffer *buf,
183 struct extent_buffer **cow_ret, u64 new_root_objectid)
184{
185 struct btrfs_fs_info *fs_info = root->fs_info;
186 struct extent_buffer *cow;
187 int ret = 0;
188 int level;
189 struct btrfs_disk_key disk_key;
190
191 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
192 trans->transid != fs_info->running_transaction->transid);
193 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
194 trans->transid != root->last_trans);
195
196 level = btrfs_header_level(buf);
197 if (level == 0)
198 btrfs_item_key(buf, &disk_key, 0);
199 else
200 btrfs_node_key(buf, &disk_key, 0);
201
202 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
203 &disk_key, level, buf->start, 0,
204 BTRFS_NESTING_NEW_ROOT);
205 if (IS_ERR(cow))
206 return PTR_ERR(cow);
207
208 copy_extent_buffer_full(cow, buf);
209 btrfs_set_header_bytenr(cow, cow->start);
210 btrfs_set_header_generation(cow, trans->transid);
211 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
212 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
213 BTRFS_HEADER_FLAG_RELOC);
214 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
215 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
216 else
217 btrfs_set_header_owner(cow, new_root_objectid);
218
219 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
220
221 WARN_ON(btrfs_header_generation(buf) > trans->transid);
222 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
223 ret = btrfs_inc_ref(trans, root, cow, 1);
224 else
225 ret = btrfs_inc_ref(trans, root, cow, 0);
226 if (ret) {
227 btrfs_tree_unlock(cow);
228 free_extent_buffer(cow);
229 btrfs_abort_transaction(trans, ret);
230 return ret;
231 }
232
233 btrfs_mark_buffer_dirty(cow);
234 *cow_ret = cow;
235 return 0;
236}
237
238
239
240
241int btrfs_block_can_be_shared(struct btrfs_root *root,
242 struct extent_buffer *buf)
243{
244
245
246
247
248
249 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
250 buf != root->node && buf != root->commit_root &&
251 (btrfs_header_generation(buf) <=
252 btrfs_root_last_snapshot(&root->root_item) ||
253 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
254 return 1;
255
256 return 0;
257}
258
259static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
260 struct btrfs_root *root,
261 struct extent_buffer *buf,
262 struct extent_buffer *cow,
263 int *last_ref)
264{
265 struct btrfs_fs_info *fs_info = root->fs_info;
266 u64 refs;
267 u64 owner;
268 u64 flags;
269 u64 new_flags = 0;
270 int ret;
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289 if (btrfs_block_can_be_shared(root, buf)) {
290 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
291 btrfs_header_level(buf), 1,
292 &refs, &flags);
293 if (ret)
294 return ret;
295 if (refs == 0) {
296 ret = -EROFS;
297 btrfs_handle_fs_error(fs_info, ret, NULL);
298 return ret;
299 }
300 } else {
301 refs = 1;
302 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
303 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
304 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
305 else
306 flags = 0;
307 }
308
309 owner = btrfs_header_owner(buf);
310 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
311 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
312
313 if (refs > 1) {
314 if ((owner == root->root_key.objectid ||
315 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
316 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
317 ret = btrfs_inc_ref(trans, root, buf, 1);
318 if (ret)
319 return ret;
320
321 if (root->root_key.objectid ==
322 BTRFS_TREE_RELOC_OBJECTID) {
323 ret = btrfs_dec_ref(trans, root, buf, 0);
324 if (ret)
325 return ret;
326 ret = btrfs_inc_ref(trans, root, cow, 1);
327 if (ret)
328 return ret;
329 }
330 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
331 } else {
332
333 if (root->root_key.objectid ==
334 BTRFS_TREE_RELOC_OBJECTID)
335 ret = btrfs_inc_ref(trans, root, cow, 1);
336 else
337 ret = btrfs_inc_ref(trans, root, cow, 0);
338 if (ret)
339 return ret;
340 }
341 if (new_flags != 0) {
342 int level = btrfs_header_level(buf);
343
344 ret = btrfs_set_disk_extent_flags(trans, buf,
345 new_flags, level, 0);
346 if (ret)
347 return ret;
348 }
349 } else {
350 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
351 if (root->root_key.objectid ==
352 BTRFS_TREE_RELOC_OBJECTID)
353 ret = btrfs_inc_ref(trans, root, cow, 1);
354 else
355 ret = btrfs_inc_ref(trans, root, cow, 0);
356 if (ret)
357 return ret;
358 ret = btrfs_dec_ref(trans, root, buf, 1);
359 if (ret)
360 return ret;
361 }
362 btrfs_clean_tree_block(buf);
363 *last_ref = 1;
364 }
365 return 0;
366}
367
368
369
370
371
372
373
374
375
376
377
378
379
380static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
381 struct btrfs_root *root,
382 struct extent_buffer *buf,
383 struct extent_buffer *parent, int parent_slot,
384 struct extent_buffer **cow_ret,
385 u64 search_start, u64 empty_size,
386 enum btrfs_lock_nesting nest)
387{
388 struct btrfs_fs_info *fs_info = root->fs_info;
389 struct btrfs_disk_key disk_key;
390 struct extent_buffer *cow;
391 int level, ret;
392 int last_ref = 0;
393 int unlock_orig = 0;
394 u64 parent_start = 0;
395
396 if (*cow_ret == buf)
397 unlock_orig = 1;
398
399 btrfs_assert_tree_write_locked(buf);
400
401 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
402 trans->transid != fs_info->running_transaction->transid);
403 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
404 trans->transid != root->last_trans);
405
406 level = btrfs_header_level(buf);
407
408 if (level == 0)
409 btrfs_item_key(buf, &disk_key, 0);
410 else
411 btrfs_node_key(buf, &disk_key, 0);
412
413 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
414 parent_start = parent->start;
415
416 cow = btrfs_alloc_tree_block(trans, root, parent_start,
417 root->root_key.objectid, &disk_key, level,
418 search_start, empty_size, nest);
419 if (IS_ERR(cow))
420 return PTR_ERR(cow);
421
422
423
424 copy_extent_buffer_full(cow, buf);
425 btrfs_set_header_bytenr(cow, cow->start);
426 btrfs_set_header_generation(cow, trans->transid);
427 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
428 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
429 BTRFS_HEADER_FLAG_RELOC);
430 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
431 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
432 else
433 btrfs_set_header_owner(cow, root->root_key.objectid);
434
435 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
436
437 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
438 if (ret) {
439 btrfs_tree_unlock(cow);
440 free_extent_buffer(cow);
441 btrfs_abort_transaction(trans, ret);
442 return ret;
443 }
444
445 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
446 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
447 if (ret) {
448 btrfs_tree_unlock(cow);
449 free_extent_buffer(cow);
450 btrfs_abort_transaction(trans, ret);
451 return ret;
452 }
453 }
454
455 if (buf == root->node) {
456 WARN_ON(parent && parent != buf);
457 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
458 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
459 parent_start = buf->start;
460
461 atomic_inc(&cow->refs);
462 ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
463 BUG_ON(ret < 0);
464 rcu_assign_pointer(root->node, cow);
465
466 btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
467 parent_start, last_ref);
468 free_extent_buffer(buf);
469 add_root_to_dirty_list(root);
470 } else {
471 WARN_ON(trans->transid != btrfs_header_generation(parent));
472 btrfs_tree_mod_log_insert_key(parent, parent_slot,
473 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
474 btrfs_set_node_blockptr(parent, parent_slot,
475 cow->start);
476 btrfs_set_node_ptr_generation(parent, parent_slot,
477 trans->transid);
478 btrfs_mark_buffer_dirty(parent);
479 if (last_ref) {
480 ret = btrfs_tree_mod_log_free_eb(buf);
481 if (ret) {
482 btrfs_tree_unlock(cow);
483 free_extent_buffer(cow);
484 btrfs_abort_transaction(trans, ret);
485 return ret;
486 }
487 }
488 btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
489 parent_start, last_ref);
490 }
491 if (unlock_orig)
492 btrfs_tree_unlock(buf);
493 free_extent_buffer_stale(buf);
494 btrfs_mark_buffer_dirty(cow);
495 *cow_ret = cow;
496 return 0;
497}
498
499static inline int should_cow_block(struct btrfs_trans_handle *trans,
500 struct btrfs_root *root,
501 struct extent_buffer *buf)
502{
503 if (btrfs_is_testing(root->fs_info))
504 return 0;
505
506
507 smp_mb__before_atomic();
508
509
510
511
512
513
514
515
516
517
518
519
520 if (btrfs_header_generation(buf) == trans->transid &&
521 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
522 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
523 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
524 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
525 return 0;
526 return 1;
527}
528
529
530
531
532
533
534noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
535 struct btrfs_root *root, struct extent_buffer *buf,
536 struct extent_buffer *parent, int parent_slot,
537 struct extent_buffer **cow_ret,
538 enum btrfs_lock_nesting nest)
539{
540 struct btrfs_fs_info *fs_info = root->fs_info;
541 u64 search_start;
542 int ret;
543
544 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
545 btrfs_err(fs_info,
546 "COW'ing blocks on a fs root that's being dropped");
547
548 if (trans->transaction != fs_info->running_transaction)
549 WARN(1, KERN_CRIT "trans %llu running %llu\n",
550 trans->transid,
551 fs_info->running_transaction->transid);
552
553 if (trans->transid != fs_info->generation)
554 WARN(1, KERN_CRIT "trans %llu running %llu\n",
555 trans->transid, fs_info->generation);
556
557 if (!should_cow_block(trans, root, buf)) {
558 *cow_ret = buf;
559 return 0;
560 }
561
562 search_start = buf->start & ~((u64)SZ_1G - 1);
563
564
565
566
567
568
569
570 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
571 ret = __btrfs_cow_block(trans, root, buf, parent,
572 parent_slot, cow_ret, search_start, 0, nest);
573
574 trace_btrfs_cow_block(root, buf, *cow_ret);
575
576 return ret;
577}
578ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
579
580
581
582
583
584static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
585{
586 if (blocknr < other && other - (blocknr + blocksize) < 32768)
587 return 1;
588 if (blocknr > other && blocknr - (other + blocksize) < 32768)
589 return 1;
590 return 0;
591}
592
593#ifdef __LITTLE_ENDIAN
594
595
596
597
598
599static int comp_keys(const struct btrfs_disk_key *disk_key,
600 const struct btrfs_key *k2)
601{
602 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
603
604 return btrfs_comp_cpu_keys(k1, k2);
605}
606
607#else
608
609
610
611
612static int comp_keys(const struct btrfs_disk_key *disk,
613 const struct btrfs_key *k2)
614{
615 struct btrfs_key k1;
616
617 btrfs_disk_key_to_cpu(&k1, disk);
618
619 return btrfs_comp_cpu_keys(&k1, k2);
620}
621#endif
622
623
624
625
626int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
627{
628 if (k1->objectid > k2->objectid)
629 return 1;
630 if (k1->objectid < k2->objectid)
631 return -1;
632 if (k1->type > k2->type)
633 return 1;
634 if (k1->type < k2->type)
635 return -1;
636 if (k1->offset > k2->offset)
637 return 1;
638 if (k1->offset < k2->offset)
639 return -1;
640 return 0;
641}
642
643
644
645
646
647
648int btrfs_realloc_node(struct btrfs_trans_handle *trans,
649 struct btrfs_root *root, struct extent_buffer *parent,
650 int start_slot, u64 *last_ret,
651 struct btrfs_key *progress)
652{
653 struct btrfs_fs_info *fs_info = root->fs_info;
654 struct extent_buffer *cur;
655 u64 blocknr;
656 u64 search_start = *last_ret;
657 u64 last_block = 0;
658 u64 other;
659 u32 parent_nritems;
660 int end_slot;
661 int i;
662 int err = 0;
663 u32 blocksize;
664 int progress_passed = 0;
665 struct btrfs_disk_key disk_key;
666
667 WARN_ON(trans->transaction != fs_info->running_transaction);
668 WARN_ON(trans->transid != fs_info->generation);
669
670 parent_nritems = btrfs_header_nritems(parent);
671 blocksize = fs_info->nodesize;
672 end_slot = parent_nritems - 1;
673
674 if (parent_nritems <= 1)
675 return 0;
676
677 for (i = start_slot; i <= end_slot; i++) {
678 int close = 1;
679
680 btrfs_node_key(parent, &disk_key, i);
681 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
682 continue;
683
684 progress_passed = 1;
685 blocknr = btrfs_node_blockptr(parent, i);
686 if (last_block == 0)
687 last_block = blocknr;
688
689 if (i > 0) {
690 other = btrfs_node_blockptr(parent, i - 1);
691 close = close_blocks(blocknr, other, blocksize);
692 }
693 if (!close && i < end_slot) {
694 other = btrfs_node_blockptr(parent, i + 1);
695 close = close_blocks(blocknr, other, blocksize);
696 }
697 if (close) {
698 last_block = blocknr;
699 continue;
700 }
701
702 cur = btrfs_read_node_slot(parent, i);
703 if (IS_ERR(cur))
704 return PTR_ERR(cur);
705 if (search_start == 0)
706 search_start = last_block;
707
708 btrfs_tree_lock(cur);
709 err = __btrfs_cow_block(trans, root, cur, parent, i,
710 &cur, search_start,
711 min(16 * blocksize,
712 (end_slot - i) * blocksize),
713 BTRFS_NESTING_COW);
714 if (err) {
715 btrfs_tree_unlock(cur);
716 free_extent_buffer(cur);
717 break;
718 }
719 search_start = cur->start;
720 last_block = cur->start;
721 *last_ret = search_start;
722 btrfs_tree_unlock(cur);
723 free_extent_buffer(cur);
724 }
725 return err;
726}
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741static noinline int generic_bin_search(struct extent_buffer *eb, int low,
742 const struct btrfs_key *key, int *slot)
743{
744 unsigned long p;
745 int item_size;
746 int high = btrfs_header_nritems(eb);
747 int ret;
748 const int key_size = sizeof(struct btrfs_disk_key);
749
750 if (low > high) {
751 btrfs_err(eb->fs_info,
752 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
753 __func__, low, high, eb->start,
754 btrfs_header_owner(eb), btrfs_header_level(eb));
755 return -EINVAL;
756 }
757
758 if (btrfs_header_level(eb) == 0) {
759 p = offsetof(struct btrfs_leaf, items);
760 item_size = sizeof(struct btrfs_item);
761 } else {
762 p = offsetof(struct btrfs_node, ptrs);
763 item_size = sizeof(struct btrfs_key_ptr);
764 }
765
766 while (low < high) {
767 unsigned long oip;
768 unsigned long offset;
769 struct btrfs_disk_key *tmp;
770 struct btrfs_disk_key unaligned;
771 int mid;
772
773 mid = (low + high) / 2;
774 offset = p + mid * item_size;
775 oip = offset_in_page(offset);
776
777 if (oip + key_size <= PAGE_SIZE) {
778 const unsigned long idx = get_eb_page_index(offset);
779 char *kaddr = page_address(eb->pages[idx]);
780
781 oip = get_eb_offset_in_page(eb, offset);
782 tmp = (struct btrfs_disk_key *)(kaddr + oip);
783 } else {
784 read_extent_buffer(eb, &unaligned, offset, key_size);
785 tmp = &unaligned;
786 }
787
788 ret = comp_keys(tmp, key);
789
790 if (ret < 0)
791 low = mid + 1;
792 else if (ret > 0)
793 high = mid;
794 else {
795 *slot = mid;
796 return 0;
797 }
798 }
799 *slot = low;
800 return 1;
801}
802
803
804
805
806
807int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
808 int *slot)
809{
810 return generic_bin_search(eb, 0, key, slot);
811}
812
813static void root_add_used(struct btrfs_root *root, u32 size)
814{
815 spin_lock(&root->accounting_lock);
816 btrfs_set_root_used(&root->root_item,
817 btrfs_root_used(&root->root_item) + size);
818 spin_unlock(&root->accounting_lock);
819}
820
821static void root_sub_used(struct btrfs_root *root, u32 size)
822{
823 spin_lock(&root->accounting_lock);
824 btrfs_set_root_used(&root->root_item,
825 btrfs_root_used(&root->root_item) - size);
826 spin_unlock(&root->accounting_lock);
827}
828
829
830
831
832struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
833 int slot)
834{
835 int level = btrfs_header_level(parent);
836 struct extent_buffer *eb;
837 struct btrfs_key first_key;
838
839 if (slot < 0 || slot >= btrfs_header_nritems(parent))
840 return ERR_PTR(-ENOENT);
841
842 BUG_ON(level == 0);
843
844 btrfs_node_key_to_cpu(parent, &first_key, slot);
845 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
846 btrfs_header_owner(parent),
847 btrfs_node_ptr_generation(parent, slot),
848 level - 1, &first_key);
849 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
850 free_extent_buffer(eb);
851 eb = ERR_PTR(-EIO);
852 }
853
854 return eb;
855}
856
857
858
859
860
861
862static noinline int balance_level(struct btrfs_trans_handle *trans,
863 struct btrfs_root *root,
864 struct btrfs_path *path, int level)
865{
866 struct btrfs_fs_info *fs_info = root->fs_info;
867 struct extent_buffer *right = NULL;
868 struct extent_buffer *mid;
869 struct extent_buffer *left = NULL;
870 struct extent_buffer *parent = NULL;
871 int ret = 0;
872 int wret;
873 int pslot;
874 int orig_slot = path->slots[level];
875 u64 orig_ptr;
876
877 ASSERT(level > 0);
878
879 mid = path->nodes[level];
880
881 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
882 WARN_ON(btrfs_header_generation(mid) != trans->transid);
883
884 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
885
886 if (level < BTRFS_MAX_LEVEL - 1) {
887 parent = path->nodes[level + 1];
888 pslot = path->slots[level + 1];
889 }
890
891
892
893
894
895 if (!parent) {
896 struct extent_buffer *child;
897
898 if (btrfs_header_nritems(mid) != 1)
899 return 0;
900
901
902 child = btrfs_read_node_slot(mid, 0);
903 if (IS_ERR(child)) {
904 ret = PTR_ERR(child);
905 btrfs_handle_fs_error(fs_info, ret, NULL);
906 goto enospc;
907 }
908
909 btrfs_tree_lock(child);
910 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
911 BTRFS_NESTING_COW);
912 if (ret) {
913 btrfs_tree_unlock(child);
914 free_extent_buffer(child);
915 goto enospc;
916 }
917
918 ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
919 BUG_ON(ret < 0);
920 rcu_assign_pointer(root->node, child);
921
922 add_root_to_dirty_list(root);
923 btrfs_tree_unlock(child);
924
925 path->locks[level] = 0;
926 path->nodes[level] = NULL;
927 btrfs_clean_tree_block(mid);
928 btrfs_tree_unlock(mid);
929
930 free_extent_buffer(mid);
931
932 root_sub_used(root, mid->len);
933 btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
934
935 free_extent_buffer_stale(mid);
936 return 0;
937 }
938 if (btrfs_header_nritems(mid) >
939 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
940 return 0;
941
942 left = btrfs_read_node_slot(parent, pslot - 1);
943 if (IS_ERR(left))
944 left = NULL;
945
946 if (left) {
947 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
948 wret = btrfs_cow_block(trans, root, left,
949 parent, pslot - 1, &left,
950 BTRFS_NESTING_LEFT_COW);
951 if (wret) {
952 ret = wret;
953 goto enospc;
954 }
955 }
956
957 right = btrfs_read_node_slot(parent, pslot + 1);
958 if (IS_ERR(right))
959 right = NULL;
960
961 if (right) {
962 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
963 wret = btrfs_cow_block(trans, root, right,
964 parent, pslot + 1, &right,
965 BTRFS_NESTING_RIGHT_COW);
966 if (wret) {
967 ret = wret;
968 goto enospc;
969 }
970 }
971
972
973 if (left) {
974 orig_slot += btrfs_header_nritems(left);
975 wret = push_node_left(trans, left, mid, 1);
976 if (wret < 0)
977 ret = wret;
978 }
979
980
981
982
983 if (right) {
984 wret = push_node_left(trans, mid, right, 1);
985 if (wret < 0 && wret != -ENOSPC)
986 ret = wret;
987 if (btrfs_header_nritems(right) == 0) {
988 btrfs_clean_tree_block(right);
989 btrfs_tree_unlock(right);
990 del_ptr(root, path, level + 1, pslot + 1);
991 root_sub_used(root, right->len);
992 btrfs_free_tree_block(trans, btrfs_root_id(root), right,
993 0, 1);
994 free_extent_buffer_stale(right);
995 right = NULL;
996 } else {
997 struct btrfs_disk_key right_key;
998 btrfs_node_key(right, &right_key, 0);
999 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1000 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1001 BUG_ON(ret < 0);
1002 btrfs_set_node_key(parent, &right_key, pslot + 1);
1003 btrfs_mark_buffer_dirty(parent);
1004 }
1005 }
1006 if (btrfs_header_nritems(mid) == 1) {
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 if (!left) {
1017 ret = -EROFS;
1018 btrfs_handle_fs_error(fs_info, ret, NULL);
1019 goto enospc;
1020 }
1021 wret = balance_node_right(trans, mid, left);
1022 if (wret < 0) {
1023 ret = wret;
1024 goto enospc;
1025 }
1026 if (wret == 1) {
1027 wret = push_node_left(trans, left, mid, 1);
1028 if (wret < 0)
1029 ret = wret;
1030 }
1031 BUG_ON(wret == 1);
1032 }
1033 if (btrfs_header_nritems(mid) == 0) {
1034 btrfs_clean_tree_block(mid);
1035 btrfs_tree_unlock(mid);
1036 del_ptr(root, path, level + 1, pslot);
1037 root_sub_used(root, mid->len);
1038 btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
1039 free_extent_buffer_stale(mid);
1040 mid = NULL;
1041 } else {
1042
1043 struct btrfs_disk_key mid_key;
1044 btrfs_node_key(mid, &mid_key, 0);
1045 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1046 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1047 BUG_ON(ret < 0);
1048 btrfs_set_node_key(parent, &mid_key, pslot);
1049 btrfs_mark_buffer_dirty(parent);
1050 }
1051
1052
1053 if (left) {
1054 if (btrfs_header_nritems(left) > orig_slot) {
1055 atomic_inc(&left->refs);
1056
1057 path->nodes[level] = left;
1058 path->slots[level + 1] -= 1;
1059 path->slots[level] = orig_slot;
1060 if (mid) {
1061 btrfs_tree_unlock(mid);
1062 free_extent_buffer(mid);
1063 }
1064 } else {
1065 orig_slot -= btrfs_header_nritems(left);
1066 path->slots[level] = orig_slot;
1067 }
1068 }
1069
1070 if (orig_ptr !=
1071 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1072 BUG();
1073enospc:
1074 if (right) {
1075 btrfs_tree_unlock(right);
1076 free_extent_buffer(right);
1077 }
1078 if (left) {
1079 if (path->nodes[level] != left)
1080 btrfs_tree_unlock(left);
1081 free_extent_buffer(left);
1082 }
1083 return ret;
1084}
1085
1086
1087
1088
1089
1090static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1091 struct btrfs_root *root,
1092 struct btrfs_path *path, int level)
1093{
1094 struct btrfs_fs_info *fs_info = root->fs_info;
1095 struct extent_buffer *right = NULL;
1096 struct extent_buffer *mid;
1097 struct extent_buffer *left = NULL;
1098 struct extent_buffer *parent = NULL;
1099 int ret = 0;
1100 int wret;
1101 int pslot;
1102 int orig_slot = path->slots[level];
1103
1104 if (level == 0)
1105 return 1;
1106
1107 mid = path->nodes[level];
1108 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1109
1110 if (level < BTRFS_MAX_LEVEL - 1) {
1111 parent = path->nodes[level + 1];
1112 pslot = path->slots[level + 1];
1113 }
1114
1115 if (!parent)
1116 return 1;
1117
1118 left = btrfs_read_node_slot(parent, pslot - 1);
1119 if (IS_ERR(left))
1120 left = NULL;
1121
1122
1123 if (left) {
1124 u32 left_nr;
1125
1126 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
1127
1128 left_nr = btrfs_header_nritems(left);
1129 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1130 wret = 1;
1131 } else {
1132 ret = btrfs_cow_block(trans, root, left, parent,
1133 pslot - 1, &left,
1134 BTRFS_NESTING_LEFT_COW);
1135 if (ret)
1136 wret = 1;
1137 else {
1138 wret = push_node_left(trans, left, mid, 0);
1139 }
1140 }
1141 if (wret < 0)
1142 ret = wret;
1143 if (wret == 0) {
1144 struct btrfs_disk_key disk_key;
1145 orig_slot += left_nr;
1146 btrfs_node_key(mid, &disk_key, 0);
1147 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1148 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1149 BUG_ON(ret < 0);
1150 btrfs_set_node_key(parent, &disk_key, pslot);
1151 btrfs_mark_buffer_dirty(parent);
1152 if (btrfs_header_nritems(left) > orig_slot) {
1153 path->nodes[level] = left;
1154 path->slots[level + 1] -= 1;
1155 path->slots[level] = orig_slot;
1156 btrfs_tree_unlock(mid);
1157 free_extent_buffer(mid);
1158 } else {
1159 orig_slot -=
1160 btrfs_header_nritems(left);
1161 path->slots[level] = orig_slot;
1162 btrfs_tree_unlock(left);
1163 free_extent_buffer(left);
1164 }
1165 return 0;
1166 }
1167 btrfs_tree_unlock(left);
1168 free_extent_buffer(left);
1169 }
1170 right = btrfs_read_node_slot(parent, pslot + 1);
1171 if (IS_ERR(right))
1172 right = NULL;
1173
1174
1175
1176
1177 if (right) {
1178 u32 right_nr;
1179
1180 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
1181
1182 right_nr = btrfs_header_nritems(right);
1183 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1184 wret = 1;
1185 } else {
1186 ret = btrfs_cow_block(trans, root, right,
1187 parent, pslot + 1,
1188 &right, BTRFS_NESTING_RIGHT_COW);
1189 if (ret)
1190 wret = 1;
1191 else {
1192 wret = balance_node_right(trans, right, mid);
1193 }
1194 }
1195 if (wret < 0)
1196 ret = wret;
1197 if (wret == 0) {
1198 struct btrfs_disk_key disk_key;
1199
1200 btrfs_node_key(right, &disk_key, 0);
1201 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1202 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1203 BUG_ON(ret < 0);
1204 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1205 btrfs_mark_buffer_dirty(parent);
1206
1207 if (btrfs_header_nritems(mid) <= orig_slot) {
1208 path->nodes[level] = right;
1209 path->slots[level + 1] += 1;
1210 path->slots[level] = orig_slot -
1211 btrfs_header_nritems(mid);
1212 btrfs_tree_unlock(mid);
1213 free_extent_buffer(mid);
1214 } else {
1215 btrfs_tree_unlock(right);
1216 free_extent_buffer(right);
1217 }
1218 return 0;
1219 }
1220 btrfs_tree_unlock(right);
1221 free_extent_buffer(right);
1222 }
1223 return 1;
1224}
1225
1226
1227
1228
1229
1230static void reada_for_search(struct btrfs_fs_info *fs_info,
1231 struct btrfs_path *path,
1232 int level, int slot, u64 objectid)
1233{
1234 struct extent_buffer *node;
1235 struct btrfs_disk_key disk_key;
1236 u32 nritems;
1237 u64 search;
1238 u64 target;
1239 u64 nread = 0;
1240 u64 nread_max;
1241 u32 nr;
1242 u32 blocksize;
1243 u32 nscan = 0;
1244
1245 if (level != 1 && path->reada != READA_FORWARD_ALWAYS)
1246 return;
1247
1248 if (!path->nodes[level])
1249 return;
1250
1251 node = path->nodes[level];
1252
1253
1254
1255
1256
1257
1258 if (path->reada == READA_FORWARD_ALWAYS) {
1259 if (level > 1)
1260 nread_max = node->fs_info->nodesize;
1261 else
1262 nread_max = SZ_128K;
1263 } else {
1264 nread_max = SZ_64K;
1265 }
1266
1267 search = btrfs_node_blockptr(node, slot);
1268 blocksize = fs_info->nodesize;
1269 if (path->reada != READA_FORWARD_ALWAYS) {
1270 struct extent_buffer *eb;
1271
1272 eb = find_extent_buffer(fs_info, search);
1273 if (eb) {
1274 free_extent_buffer(eb);
1275 return;
1276 }
1277 }
1278
1279 target = search;
1280
1281 nritems = btrfs_header_nritems(node);
1282 nr = slot;
1283
1284 while (1) {
1285 if (path->reada == READA_BACK) {
1286 if (nr == 0)
1287 break;
1288 nr--;
1289 } else if (path->reada == READA_FORWARD ||
1290 path->reada == READA_FORWARD_ALWAYS) {
1291 nr++;
1292 if (nr >= nritems)
1293 break;
1294 }
1295 if (path->reada == READA_BACK && objectid) {
1296 btrfs_node_key(node, &disk_key, nr);
1297 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1298 break;
1299 }
1300 search = btrfs_node_blockptr(node, nr);
1301 if (path->reada == READA_FORWARD_ALWAYS ||
1302 (search <= target && target - search <= 65536) ||
1303 (search > target && search - target <= 65536)) {
1304 btrfs_readahead_node_child(node, nr);
1305 nread += blocksize;
1306 }
1307 nscan++;
1308 if (nread > nread_max || nscan > 32)
1309 break;
1310 }
1311}
1312
1313static noinline void reada_for_balance(struct btrfs_path *path, int level)
1314{
1315 struct extent_buffer *parent;
1316 int slot;
1317 int nritems;
1318
1319 parent = path->nodes[level + 1];
1320 if (!parent)
1321 return;
1322
1323 nritems = btrfs_header_nritems(parent);
1324 slot = path->slots[level + 1];
1325
1326 if (slot > 0)
1327 btrfs_readahead_node_child(parent, slot - 1);
1328 if (slot + 1 < nritems)
1329 btrfs_readahead_node_child(parent, slot + 1);
1330}
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346static noinline void unlock_up(struct btrfs_path *path, int level,
1347 int lowest_unlock, int min_write_lock_level,
1348 int *write_lock_level)
1349{
1350 int i;
1351 int skip_level = level;
1352 bool check_skip = true;
1353
1354 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1355 if (!path->nodes[i])
1356 break;
1357 if (!path->locks[i])
1358 break;
1359
1360 if (check_skip) {
1361 if (path->slots[i] == 0) {
1362 skip_level = i + 1;
1363 continue;
1364 }
1365
1366 if (path->keep_locks) {
1367 u32 nritems;
1368
1369 nritems = btrfs_header_nritems(path->nodes[i]);
1370 if (nritems < 1 || path->slots[i] >= nritems - 1) {
1371 skip_level = i + 1;
1372 continue;
1373 }
1374 }
1375 }
1376
1377 if (i >= lowest_unlock && i > skip_level) {
1378 check_skip = false;
1379 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
1380 path->locks[i] = 0;
1381 if (write_lock_level &&
1382 i > min_write_lock_level &&
1383 i <= *write_lock_level) {
1384 *write_lock_level = i - 1;
1385 }
1386 }
1387 }
1388}
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398static int
1399read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
1400 struct extent_buffer **eb_ret, int level, int slot,
1401 const struct btrfs_key *key)
1402{
1403 struct btrfs_fs_info *fs_info = root->fs_info;
1404 u64 blocknr;
1405 u64 gen;
1406 struct extent_buffer *tmp;
1407 struct btrfs_key first_key;
1408 int ret;
1409 int parent_level;
1410
1411 blocknr = btrfs_node_blockptr(*eb_ret, slot);
1412 gen = btrfs_node_ptr_generation(*eb_ret, slot);
1413 parent_level = btrfs_header_level(*eb_ret);
1414 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
1415
1416 tmp = find_extent_buffer(fs_info, blocknr);
1417 if (tmp) {
1418 if (p->reada == READA_FORWARD_ALWAYS)
1419 reada_for_search(fs_info, p, level, slot, key->objectid);
1420
1421
1422 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
1423
1424
1425
1426
1427
1428 if (btrfs_verify_level_key(tmp,
1429 parent_level - 1, &first_key, gen)) {
1430 free_extent_buffer(tmp);
1431 return -EUCLEAN;
1432 }
1433 *eb_ret = tmp;
1434 return 0;
1435 }
1436
1437
1438 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
1439 if (!ret) {
1440 *eb_ret = tmp;
1441 return 0;
1442 }
1443 free_extent_buffer(tmp);
1444 btrfs_release_path(p);
1445 return -EIO;
1446 }
1447
1448
1449
1450
1451
1452
1453
1454
1455 btrfs_unlock_up_safe(p, level + 1);
1456
1457 if (p->reada != READA_NONE)
1458 reada_for_search(fs_info, p, level, slot, key->objectid);
1459
1460 ret = -EAGAIN;
1461 tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
1462 gen, parent_level - 1, &first_key);
1463 if (!IS_ERR(tmp)) {
1464
1465
1466
1467
1468
1469
1470 if (!extent_buffer_uptodate(tmp))
1471 ret = -EIO;
1472 free_extent_buffer(tmp);
1473 } else {
1474 ret = PTR_ERR(tmp);
1475 }
1476
1477 btrfs_release_path(p);
1478 return ret;
1479}
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490static int
1491setup_nodes_for_search(struct btrfs_trans_handle *trans,
1492 struct btrfs_root *root, struct btrfs_path *p,
1493 struct extent_buffer *b, int level, int ins_len,
1494 int *write_lock_level)
1495{
1496 struct btrfs_fs_info *fs_info = root->fs_info;
1497 int ret = 0;
1498
1499 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1500 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
1501
1502 if (*write_lock_level < level + 1) {
1503 *write_lock_level = level + 1;
1504 btrfs_release_path(p);
1505 return -EAGAIN;
1506 }
1507
1508 reada_for_balance(p, level);
1509 ret = split_node(trans, root, p, level);
1510
1511 b = p->nodes[level];
1512 } else if (ins_len < 0 && btrfs_header_nritems(b) <
1513 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
1514
1515 if (*write_lock_level < level + 1) {
1516 *write_lock_level = level + 1;
1517 btrfs_release_path(p);
1518 return -EAGAIN;
1519 }
1520
1521 reada_for_balance(p, level);
1522 ret = balance_level(trans, root, p, level);
1523 if (ret)
1524 return ret;
1525
1526 b = p->nodes[level];
1527 if (!b) {
1528 btrfs_release_path(p);
1529 return -EAGAIN;
1530 }
1531 BUG_ON(btrfs_header_nritems(b) == 1);
1532 }
1533 return ret;
1534}
1535
1536int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
1537 u64 iobjectid, u64 ioff, u8 key_type,
1538 struct btrfs_key *found_key)
1539{
1540 int ret;
1541 struct btrfs_key key;
1542 struct extent_buffer *eb;
1543
1544 ASSERT(path);
1545 ASSERT(found_key);
1546
1547 key.type = key_type;
1548 key.objectid = iobjectid;
1549 key.offset = ioff;
1550
1551 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1552 if (ret < 0)
1553 return ret;
1554
1555 eb = path->nodes[0];
1556 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1557 ret = btrfs_next_leaf(fs_root, path);
1558 if (ret)
1559 return ret;
1560 eb = path->nodes[0];
1561 }
1562
1563 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1564 if (found_key->type != key.type ||
1565 found_key->objectid != key.objectid)
1566 return 1;
1567
1568 return 0;
1569}
1570
1571static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
1572 struct btrfs_path *p,
1573 int write_lock_level)
1574{
1575 struct extent_buffer *b;
1576 int root_lock = 0;
1577 int level = 0;
1578
1579 if (p->search_commit_root) {
1580 b = root->commit_root;
1581 atomic_inc(&b->refs);
1582 level = btrfs_header_level(b);
1583
1584
1585
1586
1587 ASSERT(p->skip_locking == 1);
1588
1589 goto out;
1590 }
1591
1592 if (p->skip_locking) {
1593 b = btrfs_root_node(root);
1594 level = btrfs_header_level(b);
1595 goto out;
1596 }
1597
1598
1599 root_lock = BTRFS_READ_LOCK;
1600
1601
1602
1603
1604
1605 if (write_lock_level < BTRFS_MAX_LEVEL) {
1606
1607
1608
1609
1610 b = btrfs_read_lock_root_node(root);
1611 level = btrfs_header_level(b);
1612 if (level > write_lock_level)
1613 goto out;
1614
1615
1616 btrfs_tree_read_unlock(b);
1617 free_extent_buffer(b);
1618 }
1619
1620 b = btrfs_lock_root_node(root);
1621 root_lock = BTRFS_WRITE_LOCK;
1622
1623
1624 level = btrfs_header_level(b);
1625
1626out:
1627
1628
1629
1630
1631 if (!extent_buffer_uptodate(b)) {
1632 if (root_lock)
1633 btrfs_tree_unlock_rw(b, root_lock);
1634 free_extent_buffer(b);
1635 return ERR_PTR(-EIO);
1636 }
1637
1638 p->nodes[level] = b;
1639 if (!p->skip_locking)
1640 p->locks[level] = root_lock;
1641
1642
1643
1644 return b;
1645}
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659static int finish_need_commit_sem_search(struct btrfs_path *path)
1660{
1661 const int i = path->lowest_level;
1662 const int slot = path->slots[i];
1663 struct extent_buffer *lowest = path->nodes[i];
1664 struct extent_buffer *clone;
1665
1666 ASSERT(path->need_commit_sem);
1667
1668 if (!lowest)
1669 return 0;
1670
1671 lockdep_assert_held_read(&lowest->fs_info->commit_root_sem);
1672
1673 clone = btrfs_clone_extent_buffer(lowest);
1674 if (!clone)
1675 return -ENOMEM;
1676
1677 btrfs_release_path(path);
1678 path->nodes[i] = clone;
1679 path->slots[i] = slot;
1680
1681 return 0;
1682}
1683
1684static inline int search_for_key_slot(struct extent_buffer *eb,
1685 int search_low_slot,
1686 const struct btrfs_key *key,
1687 int prev_cmp,
1688 int *slot)
1689{
1690
1691
1692
1693
1694
1695
1696
1697 if (prev_cmp == 0) {
1698 *slot = 0;
1699 return 0;
1700 }
1701
1702 return generic_bin_search(eb, search_low_slot, key, slot);
1703}
1704
1705static int search_leaf(struct btrfs_trans_handle *trans,
1706 struct btrfs_root *root,
1707 const struct btrfs_key *key,
1708 struct btrfs_path *path,
1709 int ins_len,
1710 int prev_cmp)
1711{
1712 struct extent_buffer *leaf = path->nodes[0];
1713 int leaf_free_space = -1;
1714 int search_low_slot = 0;
1715 int ret;
1716 bool do_bin_search = true;
1717
1718
1719
1720
1721
1722
1723
1724
1725 if (ins_len > 0) {
1726
1727
1728
1729
1730 leaf_free_space = btrfs_leaf_free_space(leaf);
1731
1732
1733
1734
1735
1736 if (path->locks[1] && leaf_free_space >= ins_len) {
1737 struct btrfs_disk_key first_key;
1738
1739 ASSERT(btrfs_header_nritems(leaf) > 0);
1740 btrfs_item_key(leaf, &first_key, 0);
1741
1742
1743
1744
1745
1746
1747
1748
1749 ret = comp_keys(&first_key, key);
1750 if (ret < 0) {
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763 btrfs_unlock_up_safe(path, 1);
1764 search_low_slot = 1;
1765 } else {
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780 if (ret == 0)
1781 btrfs_unlock_up_safe(path, 1);
1782
1783
1784
1785
1786
1787 do_bin_search = false;
1788 path->slots[0] = 0;
1789 }
1790 }
1791 }
1792
1793 if (do_bin_search) {
1794 ret = search_for_key_slot(leaf, search_low_slot, key,
1795 prev_cmp, &path->slots[0]);
1796 if (ret < 0)
1797 return ret;
1798 }
1799
1800 if (ins_len > 0) {
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810 if (ret == 0 && !path->search_for_extension) {
1811 ASSERT(ins_len >= sizeof(struct btrfs_item));
1812 ins_len -= sizeof(struct btrfs_item);
1813 }
1814
1815 ASSERT(leaf_free_space >= 0);
1816
1817 if (leaf_free_space < ins_len) {
1818 int err;
1819
1820 err = split_leaf(trans, root, key, path, ins_len,
1821 (ret == 0));
1822 ASSERT(err <= 0);
1823 if (WARN_ON(err > 0))
1824 err = -EUCLEAN;
1825 if (err)
1826 ret = err;
1827 }
1828 }
1829
1830 return ret;
1831}
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1865 const struct btrfs_key *key, struct btrfs_path *p,
1866 int ins_len, int cow)
1867{
1868 struct btrfs_fs_info *fs_info = root->fs_info;
1869 struct extent_buffer *b;
1870 int slot;
1871 int ret;
1872 int err;
1873 int level;
1874 int lowest_unlock = 1;
1875
1876 int write_lock_level = 0;
1877 u8 lowest_level = 0;
1878 int min_write_lock_level;
1879 int prev_cmp;
1880
1881 lowest_level = p->lowest_level;
1882 WARN_ON(lowest_level && ins_len > 0);
1883 WARN_ON(p->nodes[0] != NULL);
1884 BUG_ON(!cow && ins_len);
1885
1886 if (ins_len < 0) {
1887 lowest_unlock = 2;
1888
1889
1890
1891
1892
1893 write_lock_level = 2;
1894 } else if (ins_len > 0) {
1895
1896
1897
1898
1899 write_lock_level = 1;
1900 }
1901
1902 if (!cow)
1903 write_lock_level = -1;
1904
1905 if (cow && (p->keep_locks || p->lowest_level))
1906 write_lock_level = BTRFS_MAX_LEVEL;
1907
1908 min_write_lock_level = write_lock_level;
1909
1910 if (p->need_commit_sem) {
1911 ASSERT(p->search_commit_root);
1912 down_read(&fs_info->commit_root_sem);
1913 }
1914
1915again:
1916 prev_cmp = -1;
1917 b = btrfs_search_slot_get_root(root, p, write_lock_level);
1918 if (IS_ERR(b)) {
1919 ret = PTR_ERR(b);
1920 goto done;
1921 }
1922
1923 while (b) {
1924 int dec = 0;
1925
1926 level = btrfs_header_level(b);
1927
1928 if (cow) {
1929 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
1930
1931
1932
1933
1934
1935
1936 if (!should_cow_block(trans, root, b))
1937 goto cow_done;
1938
1939
1940
1941
1942
1943 if (level > write_lock_level ||
1944 (level + 1 > write_lock_level &&
1945 level + 1 < BTRFS_MAX_LEVEL &&
1946 p->nodes[level + 1])) {
1947 write_lock_level = level + 1;
1948 btrfs_release_path(p);
1949 goto again;
1950 }
1951
1952 if (last_level)
1953 err = btrfs_cow_block(trans, root, b, NULL, 0,
1954 &b,
1955 BTRFS_NESTING_COW);
1956 else
1957 err = btrfs_cow_block(trans, root, b,
1958 p->nodes[level + 1],
1959 p->slots[level + 1], &b,
1960 BTRFS_NESTING_COW);
1961 if (err) {
1962 ret = err;
1963 goto done;
1964 }
1965 }
1966cow_done:
1967 p->nodes[level] = b;
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980 if (!ins_len && !p->keep_locks) {
1981 int u = level + 1;
1982
1983 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
1984 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
1985 p->locks[u] = 0;
1986 }
1987 }
1988
1989 if (level == 0) {
1990 if (ins_len > 0)
1991 ASSERT(write_lock_level >= 1);
1992
1993 ret = search_leaf(trans, root, key, p, ins_len, prev_cmp);
1994 if (!p->search_for_split)
1995 unlock_up(p, level, lowest_unlock,
1996 min_write_lock_level, NULL);
1997 goto done;
1998 }
1999
2000 ret = search_for_key_slot(b, 0, key, prev_cmp, &slot);
2001 if (ret < 0)
2002 goto done;
2003 prev_cmp = ret;
2004
2005 if (ret && slot > 0) {
2006 dec = 1;
2007 slot--;
2008 }
2009 p->slots[level] = slot;
2010 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
2011 &write_lock_level);
2012 if (err == -EAGAIN)
2013 goto again;
2014 if (err) {
2015 ret = err;
2016 goto done;
2017 }
2018 b = p->nodes[level];
2019 slot = p->slots[level];
2020
2021
2022
2023
2024
2025
2026 if (slot == 0 && ins_len && write_lock_level < level + 1) {
2027 write_lock_level = level + 1;
2028 btrfs_release_path(p);
2029 goto again;
2030 }
2031
2032 unlock_up(p, level, lowest_unlock, min_write_lock_level,
2033 &write_lock_level);
2034
2035 if (level == lowest_level) {
2036 if (dec)
2037 p->slots[level]++;
2038 goto done;
2039 }
2040
2041 err = read_block_for_search(root, p, &b, level, slot, key);
2042 if (err == -EAGAIN)
2043 goto again;
2044 if (err) {
2045 ret = err;
2046 goto done;
2047 }
2048
2049 if (!p->skip_locking) {
2050 level = btrfs_header_level(b);
2051 if (level <= write_lock_level) {
2052 btrfs_tree_lock(b);
2053 p->locks[level] = BTRFS_WRITE_LOCK;
2054 } else {
2055 btrfs_tree_read_lock(b);
2056 p->locks[level] = BTRFS_READ_LOCK;
2057 }
2058 p->nodes[level] = b;
2059 }
2060 }
2061 ret = 1;
2062done:
2063 if (ret < 0 && !p->skip_release_on_error)
2064 btrfs_release_path(p);
2065
2066 if (p->need_commit_sem) {
2067 int ret2;
2068
2069 ret2 = finish_need_commit_sem_search(p);
2070 up_read(&fs_info->commit_root_sem);
2071 if (ret2)
2072 ret = ret2;
2073 }
2074
2075 return ret;
2076}
2077ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
2091 struct btrfs_path *p, u64 time_seq)
2092{
2093 struct btrfs_fs_info *fs_info = root->fs_info;
2094 struct extent_buffer *b;
2095 int slot;
2096 int ret;
2097 int err;
2098 int level;
2099 int lowest_unlock = 1;
2100 u8 lowest_level = 0;
2101
2102 lowest_level = p->lowest_level;
2103 WARN_ON(p->nodes[0] != NULL);
2104
2105 if (p->search_commit_root) {
2106 BUG_ON(time_seq);
2107 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2108 }
2109
2110again:
2111 b = btrfs_get_old_root(root, time_seq);
2112 if (!b) {
2113 ret = -EIO;
2114 goto done;
2115 }
2116 level = btrfs_header_level(b);
2117 p->locks[level] = BTRFS_READ_LOCK;
2118
2119 while (b) {
2120 int dec = 0;
2121
2122 level = btrfs_header_level(b);
2123 p->nodes[level] = b;
2124
2125
2126
2127
2128
2129
2130
2131 btrfs_unlock_up_safe(p, level + 1);
2132
2133 ret = btrfs_bin_search(b, key, &slot);
2134 if (ret < 0)
2135 goto done;
2136
2137 if (level == 0) {
2138 p->slots[level] = slot;
2139 unlock_up(p, level, lowest_unlock, 0, NULL);
2140 goto done;
2141 }
2142
2143 if (ret && slot > 0) {
2144 dec = 1;
2145 slot--;
2146 }
2147 p->slots[level] = slot;
2148 unlock_up(p, level, lowest_unlock, 0, NULL);
2149
2150 if (level == lowest_level) {
2151 if (dec)
2152 p->slots[level]++;
2153 goto done;
2154 }
2155
2156 err = read_block_for_search(root, p, &b, level, slot, key);
2157 if (err == -EAGAIN)
2158 goto again;
2159 if (err) {
2160 ret = err;
2161 goto done;
2162 }
2163
2164 level = btrfs_header_level(b);
2165 btrfs_tree_read_lock(b);
2166 b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
2167 if (!b) {
2168 ret = -ENOMEM;
2169 goto done;
2170 }
2171 p->locks[level] = BTRFS_READ_LOCK;
2172 p->nodes[level] = b;
2173 }
2174 ret = 1;
2175done:
2176 if (ret < 0)
2177 btrfs_release_path(p);
2178
2179 return ret;
2180}
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194int btrfs_search_slot_for_read(struct btrfs_root *root,
2195 const struct btrfs_key *key,
2196 struct btrfs_path *p, int find_higher,
2197 int return_any)
2198{
2199 int ret;
2200 struct extent_buffer *leaf;
2201
2202again:
2203 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2204 if (ret <= 0)
2205 return ret;
2206
2207
2208
2209
2210
2211
2212
2213 leaf = p->nodes[0];
2214
2215 if (find_higher) {
2216 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2217 ret = btrfs_next_leaf(root, p);
2218 if (ret <= 0)
2219 return ret;
2220 if (!return_any)
2221 return 1;
2222
2223
2224
2225
2226 return_any = 0;
2227 find_higher = 0;
2228 btrfs_release_path(p);
2229 goto again;
2230 }
2231 } else {
2232 if (p->slots[0] == 0) {
2233 ret = btrfs_prev_leaf(root, p);
2234 if (ret < 0)
2235 return ret;
2236 if (!ret) {
2237 leaf = p->nodes[0];
2238 if (p->slots[0] == btrfs_header_nritems(leaf))
2239 p->slots[0]--;
2240 return 0;
2241 }
2242 if (!return_any)
2243 return 1;
2244
2245
2246
2247
2248 return_any = 0;
2249 find_higher = 1;
2250 btrfs_release_path(p);
2251 goto again;
2252 } else {
2253 --p->slots[0];
2254 }
2255 }
2256 return 0;
2257}
2258
2259
2260
2261
2262
2263
2264
2265int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
2266 struct btrfs_path *path)
2267{
2268 int ret;
2269
2270 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2271 if (ret > 0)
2272 ret = btrfs_previous_item(root, path, key->objectid, key->type);
2273
2274 if (ret == 0)
2275 btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
2276
2277 return ret;
2278}
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288static void fixup_low_keys(struct btrfs_path *path,
2289 struct btrfs_disk_key *key, int level)
2290{
2291 int i;
2292 struct extent_buffer *t;
2293 int ret;
2294
2295 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2296 int tslot = path->slots[i];
2297
2298 if (!path->nodes[i])
2299 break;
2300 t = path->nodes[i];
2301 ret = btrfs_tree_mod_log_insert_key(t, tslot,
2302 BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC);
2303 BUG_ON(ret < 0);
2304 btrfs_set_node_key(t, key, tslot);
2305 btrfs_mark_buffer_dirty(path->nodes[i]);
2306 if (tslot != 0)
2307 break;
2308 }
2309}
2310
2311
2312
2313
2314
2315
2316
2317void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
2318 struct btrfs_path *path,
2319 const struct btrfs_key *new_key)
2320{
2321 struct btrfs_disk_key disk_key;
2322 struct extent_buffer *eb;
2323 int slot;
2324
2325 eb = path->nodes[0];
2326 slot = path->slots[0];
2327 if (slot > 0) {
2328 btrfs_item_key(eb, &disk_key, slot - 1);
2329 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
2330 btrfs_crit(fs_info,
2331 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2332 slot, btrfs_disk_key_objectid(&disk_key),
2333 btrfs_disk_key_type(&disk_key),
2334 btrfs_disk_key_offset(&disk_key),
2335 new_key->objectid, new_key->type,
2336 new_key->offset);
2337 btrfs_print_leaf(eb);
2338 BUG();
2339 }
2340 }
2341 if (slot < btrfs_header_nritems(eb) - 1) {
2342 btrfs_item_key(eb, &disk_key, slot + 1);
2343 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
2344 btrfs_crit(fs_info,
2345 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2346 slot, btrfs_disk_key_objectid(&disk_key),
2347 btrfs_disk_key_type(&disk_key),
2348 btrfs_disk_key_offset(&disk_key),
2349 new_key->objectid, new_key->type,
2350 new_key->offset);
2351 btrfs_print_leaf(eb);
2352 BUG();
2353 }
2354 }
2355
2356 btrfs_cpu_key_to_disk(&disk_key, new_key);
2357 btrfs_set_item_key(eb, &disk_key, slot);
2358 btrfs_mark_buffer_dirty(eb);
2359 if (slot == 0)
2360 fixup_low_keys(path, &disk_key, 1);
2361}
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383static bool check_sibling_keys(struct extent_buffer *left,
2384 struct extent_buffer *right)
2385{
2386 struct btrfs_key left_last;
2387 struct btrfs_key right_first;
2388 int level = btrfs_header_level(left);
2389 int nr_left = btrfs_header_nritems(left);
2390 int nr_right = btrfs_header_nritems(right);
2391
2392
2393 if (!nr_left || !nr_right)
2394 return false;
2395
2396 if (level) {
2397 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
2398 btrfs_node_key_to_cpu(right, &right_first, 0);
2399 } else {
2400 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
2401 btrfs_item_key_to_cpu(right, &right_first, 0);
2402 }
2403
2404 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
2405 btrfs_crit(left->fs_info,
2406"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
2407 left_last.objectid, left_last.type,
2408 left_last.offset, right_first.objectid,
2409 right_first.type, right_first.offset);
2410 return true;
2411 }
2412 return false;
2413}
2414
2415
2416
2417
2418
2419
2420
2421
2422static int push_node_left(struct btrfs_trans_handle *trans,
2423 struct extent_buffer *dst,
2424 struct extent_buffer *src, int empty)
2425{
2426 struct btrfs_fs_info *fs_info = trans->fs_info;
2427 int push_items = 0;
2428 int src_nritems;
2429 int dst_nritems;
2430 int ret = 0;
2431
2432 src_nritems = btrfs_header_nritems(src);
2433 dst_nritems = btrfs_header_nritems(dst);
2434 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2435 WARN_ON(btrfs_header_generation(src) != trans->transid);
2436 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2437
2438 if (!empty && src_nritems <= 8)
2439 return 1;
2440
2441 if (push_items <= 0)
2442 return 1;
2443
2444 if (empty) {
2445 push_items = min(src_nritems, push_items);
2446 if (push_items < src_nritems) {
2447
2448
2449
2450 if (src_nritems - push_items < 8) {
2451 if (push_items <= 8)
2452 return 1;
2453 push_items -= 8;
2454 }
2455 }
2456 } else
2457 push_items = min(src_nritems - 8, push_items);
2458
2459
2460 if (check_sibling_keys(dst, src)) {
2461 ret = -EUCLEAN;
2462 btrfs_abort_transaction(trans, ret);
2463 return ret;
2464 }
2465 ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
2466 if (ret) {
2467 btrfs_abort_transaction(trans, ret);
2468 return ret;
2469 }
2470 copy_extent_buffer(dst, src,
2471 btrfs_node_key_ptr_offset(dst_nritems),
2472 btrfs_node_key_ptr_offset(0),
2473 push_items * sizeof(struct btrfs_key_ptr));
2474
2475 if (push_items < src_nritems) {
2476
2477
2478
2479
2480 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2481 btrfs_node_key_ptr_offset(push_items),
2482 (src_nritems - push_items) *
2483 sizeof(struct btrfs_key_ptr));
2484 }
2485 btrfs_set_header_nritems(src, src_nritems - push_items);
2486 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2487 btrfs_mark_buffer_dirty(src);
2488 btrfs_mark_buffer_dirty(dst);
2489
2490 return ret;
2491}
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502static int balance_node_right(struct btrfs_trans_handle *trans,
2503 struct extent_buffer *dst,
2504 struct extent_buffer *src)
2505{
2506 struct btrfs_fs_info *fs_info = trans->fs_info;
2507 int push_items = 0;
2508 int max_push;
2509 int src_nritems;
2510 int dst_nritems;
2511 int ret = 0;
2512
2513 WARN_ON(btrfs_header_generation(src) != trans->transid);
2514 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2515
2516 src_nritems = btrfs_header_nritems(src);
2517 dst_nritems = btrfs_header_nritems(dst);
2518 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2519 if (push_items <= 0)
2520 return 1;
2521
2522 if (src_nritems < 4)
2523 return 1;
2524
2525 max_push = src_nritems / 2 + 1;
2526
2527 if (max_push >= src_nritems)
2528 return 1;
2529
2530 if (max_push < push_items)
2531 push_items = max_push;
2532
2533
2534 if (check_sibling_keys(src, dst)) {
2535 ret = -EUCLEAN;
2536 btrfs_abort_transaction(trans, ret);
2537 return ret;
2538 }
2539 ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
2540 BUG_ON(ret < 0);
2541 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2542 btrfs_node_key_ptr_offset(0),
2543 (dst_nritems) *
2544 sizeof(struct btrfs_key_ptr));
2545
2546 ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
2547 push_items);
2548 if (ret) {
2549 btrfs_abort_transaction(trans, ret);
2550 return ret;
2551 }
2552 copy_extent_buffer(dst, src,
2553 btrfs_node_key_ptr_offset(0),
2554 btrfs_node_key_ptr_offset(src_nritems - push_items),
2555 push_items * sizeof(struct btrfs_key_ptr));
2556
2557 btrfs_set_header_nritems(src, src_nritems - push_items);
2558 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2559
2560 btrfs_mark_buffer_dirty(src);
2561 btrfs_mark_buffer_dirty(dst);
2562
2563 return ret;
2564}
2565
2566
2567
2568
2569
2570
2571
2572
2573static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2574 struct btrfs_root *root,
2575 struct btrfs_path *path, int level)
2576{
2577 struct btrfs_fs_info *fs_info = root->fs_info;
2578 u64 lower_gen;
2579 struct extent_buffer *lower;
2580 struct extent_buffer *c;
2581 struct extent_buffer *old;
2582 struct btrfs_disk_key lower_key;
2583 int ret;
2584
2585 BUG_ON(path->nodes[level]);
2586 BUG_ON(path->nodes[level-1] != root->node);
2587
2588 lower = path->nodes[level-1];
2589 if (level == 1)
2590 btrfs_item_key(lower, &lower_key, 0);
2591 else
2592 btrfs_node_key(lower, &lower_key, 0);
2593
2594 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2595 &lower_key, level, root->node->start, 0,
2596 BTRFS_NESTING_NEW_ROOT);
2597 if (IS_ERR(c))
2598 return PTR_ERR(c);
2599
2600 root_add_used(root, fs_info->nodesize);
2601
2602 btrfs_set_header_nritems(c, 1);
2603 btrfs_set_node_key(c, &lower_key, 0);
2604 btrfs_set_node_blockptr(c, 0, lower->start);
2605 lower_gen = btrfs_header_generation(lower);
2606 WARN_ON(lower_gen != trans->transid);
2607
2608 btrfs_set_node_ptr_generation(c, 0, lower_gen);
2609
2610 btrfs_mark_buffer_dirty(c);
2611
2612 old = root->node;
2613 ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
2614 BUG_ON(ret < 0);
2615 rcu_assign_pointer(root->node, c);
2616
2617
2618 free_extent_buffer(old);
2619
2620 add_root_to_dirty_list(root);
2621 atomic_inc(&c->refs);
2622 path->nodes[level] = c;
2623 path->locks[level] = BTRFS_WRITE_LOCK;
2624 path->slots[level] = 0;
2625 return 0;
2626}
2627
2628
2629
2630
2631
2632
2633
2634
2635static void insert_ptr(struct btrfs_trans_handle *trans,
2636 struct btrfs_path *path,
2637 struct btrfs_disk_key *key, u64 bytenr,
2638 int slot, int level)
2639{
2640 struct extent_buffer *lower;
2641 int nritems;
2642 int ret;
2643
2644 BUG_ON(!path->nodes[level]);
2645 btrfs_assert_tree_write_locked(path->nodes[level]);
2646 lower = path->nodes[level];
2647 nritems = btrfs_header_nritems(lower);
2648 BUG_ON(slot > nritems);
2649 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
2650 if (slot != nritems) {
2651 if (level) {
2652 ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
2653 slot, nritems - slot);
2654 BUG_ON(ret < 0);
2655 }
2656 memmove_extent_buffer(lower,
2657 btrfs_node_key_ptr_offset(slot + 1),
2658 btrfs_node_key_ptr_offset(slot),
2659 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2660 }
2661 if (level) {
2662 ret = btrfs_tree_mod_log_insert_key(lower, slot,
2663 BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS);
2664 BUG_ON(ret < 0);
2665 }
2666 btrfs_set_node_key(lower, key, slot);
2667 btrfs_set_node_blockptr(lower, slot, bytenr);
2668 WARN_ON(trans->transid == 0);
2669 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2670 btrfs_set_header_nritems(lower, nritems + 1);
2671 btrfs_mark_buffer_dirty(lower);
2672}
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683static noinline int split_node(struct btrfs_trans_handle *trans,
2684 struct btrfs_root *root,
2685 struct btrfs_path *path, int level)
2686{
2687 struct btrfs_fs_info *fs_info = root->fs_info;
2688 struct extent_buffer *c;
2689 struct extent_buffer *split;
2690 struct btrfs_disk_key disk_key;
2691 int mid;
2692 int ret;
2693 u32 c_nritems;
2694
2695 c = path->nodes[level];
2696 WARN_ON(btrfs_header_generation(c) != trans->transid);
2697 if (c == root->node) {
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708 ret = insert_new_root(trans, root, path, level + 1);
2709 if (ret)
2710 return ret;
2711 } else {
2712 ret = push_nodes_for_insert(trans, root, path, level);
2713 c = path->nodes[level];
2714 if (!ret && btrfs_header_nritems(c) <
2715 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
2716 return 0;
2717 if (ret < 0)
2718 return ret;
2719 }
2720
2721 c_nritems = btrfs_header_nritems(c);
2722 mid = (c_nritems + 1) / 2;
2723 btrfs_node_key(c, &disk_key, mid);
2724
2725 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2726 &disk_key, level, c->start, 0,
2727 BTRFS_NESTING_SPLIT);
2728 if (IS_ERR(split))
2729 return PTR_ERR(split);
2730
2731 root_add_used(root, fs_info->nodesize);
2732 ASSERT(btrfs_header_level(c) == level);
2733
2734 ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
2735 if (ret) {
2736 btrfs_abort_transaction(trans, ret);
2737 return ret;
2738 }
2739 copy_extent_buffer(split, c,
2740 btrfs_node_key_ptr_offset(0),
2741 btrfs_node_key_ptr_offset(mid),
2742 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2743 btrfs_set_header_nritems(split, c_nritems - mid);
2744 btrfs_set_header_nritems(c, mid);
2745
2746 btrfs_mark_buffer_dirty(c);
2747 btrfs_mark_buffer_dirty(split);
2748
2749 insert_ptr(trans, path, &disk_key, split->start,
2750 path->slots[level + 1] + 1, level + 1);
2751
2752 if (path->slots[level] >= mid) {
2753 path->slots[level] -= mid;
2754 btrfs_tree_unlock(c);
2755 free_extent_buffer(c);
2756 path->nodes[level] = split;
2757 path->slots[level + 1] += 1;
2758 } else {
2759 btrfs_tree_unlock(split);
2760 free_extent_buffer(split);
2761 }
2762 return 0;
2763}
2764
2765
2766
2767
2768
2769
2770static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2771{
2772 int data_len;
2773 int nritems = btrfs_header_nritems(l);
2774 int end = min(nritems, start + nr) - 1;
2775
2776 if (!nr)
2777 return 0;
2778 data_len = btrfs_item_offset(l, start) + btrfs_item_size(l, start);
2779 data_len = data_len - btrfs_item_offset(l, end);
2780 data_len += sizeof(struct btrfs_item) * nr;
2781 WARN_ON(data_len < 0);
2782 return data_len;
2783}
2784
2785
2786
2787
2788
2789
2790noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
2791{
2792 struct btrfs_fs_info *fs_info = leaf->fs_info;
2793 int nritems = btrfs_header_nritems(leaf);
2794 int ret;
2795
2796 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
2797 if (ret < 0) {
2798 btrfs_crit(fs_info,
2799 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
2800 ret,
2801 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
2802 leaf_space_used(leaf, 0, nritems), nritems);
2803 }
2804 return ret;
2805}
2806
2807
2808
2809
2810
2811static noinline int __push_leaf_right(struct btrfs_path *path,
2812 int data_size, int empty,
2813 struct extent_buffer *right,
2814 int free_space, u32 left_nritems,
2815 u32 min_slot)
2816{
2817 struct btrfs_fs_info *fs_info = right->fs_info;
2818 struct extent_buffer *left = path->nodes[0];
2819 struct extent_buffer *upper = path->nodes[1];
2820 struct btrfs_map_token token;
2821 struct btrfs_disk_key disk_key;
2822 int slot;
2823 u32 i;
2824 int push_space = 0;
2825 int push_items = 0;
2826 u32 nr;
2827 u32 right_nritems;
2828 u32 data_end;
2829 u32 this_item_size;
2830
2831 if (empty)
2832 nr = 0;
2833 else
2834 nr = max_t(u32, 1, min_slot);
2835
2836 if (path->slots[0] >= left_nritems)
2837 push_space += data_size;
2838
2839 slot = path->slots[1];
2840 i = left_nritems - 1;
2841 while (i >= nr) {
2842 if (!empty && push_items > 0) {
2843 if (path->slots[0] > i)
2844 break;
2845 if (path->slots[0] == i) {
2846 int space = btrfs_leaf_free_space(left);
2847
2848 if (space + push_space * 2 > free_space)
2849 break;
2850 }
2851 }
2852
2853 if (path->slots[0] == i)
2854 push_space += data_size;
2855
2856 this_item_size = btrfs_item_size(left, i);
2857 if (this_item_size + sizeof(struct btrfs_item) +
2858 push_space > free_space)
2859 break;
2860
2861 push_items++;
2862 push_space += this_item_size + sizeof(struct btrfs_item);
2863 if (i == 0)
2864 break;
2865 i--;
2866 }
2867
2868 if (push_items == 0)
2869 goto out_unlock;
2870
2871 WARN_ON(!empty && push_items == left_nritems);
2872
2873
2874 right_nritems = btrfs_header_nritems(right);
2875
2876 push_space = btrfs_item_data_end(left, left_nritems - push_items);
2877 push_space -= leaf_data_end(left);
2878
2879
2880 data_end = leaf_data_end(right);
2881 memmove_extent_buffer(right,
2882 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
2883 BTRFS_LEAF_DATA_OFFSET + data_end,
2884 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
2885
2886
2887 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
2888 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
2889 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
2890 push_space);
2891
2892 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2893 btrfs_item_nr_offset(0),
2894 right_nritems * sizeof(struct btrfs_item));
2895
2896
2897 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2898 btrfs_item_nr_offset(left_nritems - push_items),
2899 push_items * sizeof(struct btrfs_item));
2900
2901
2902 btrfs_init_map_token(&token, right);
2903 right_nritems += push_items;
2904 btrfs_set_header_nritems(right, right_nritems);
2905 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
2906 for (i = 0; i < right_nritems; i++) {
2907 push_space -= btrfs_token_item_size(&token, i);
2908 btrfs_set_token_item_offset(&token, i, push_space);
2909 }
2910
2911 left_nritems -= push_items;
2912 btrfs_set_header_nritems(left, left_nritems);
2913
2914 if (left_nritems)
2915 btrfs_mark_buffer_dirty(left);
2916 else
2917 btrfs_clean_tree_block(left);
2918
2919 btrfs_mark_buffer_dirty(right);
2920
2921 btrfs_item_key(right, &disk_key, 0);
2922 btrfs_set_node_key(upper, &disk_key, slot + 1);
2923 btrfs_mark_buffer_dirty(upper);
2924
2925
2926 if (path->slots[0] >= left_nritems) {
2927 path->slots[0] -= left_nritems;
2928 if (btrfs_header_nritems(path->nodes[0]) == 0)
2929 btrfs_clean_tree_block(path->nodes[0]);
2930 btrfs_tree_unlock(path->nodes[0]);
2931 free_extent_buffer(path->nodes[0]);
2932 path->nodes[0] = right;
2933 path->slots[1] += 1;
2934 } else {
2935 btrfs_tree_unlock(right);
2936 free_extent_buffer(right);
2937 }
2938 return 0;
2939
2940out_unlock:
2941 btrfs_tree_unlock(right);
2942 free_extent_buffer(right);
2943 return 1;
2944}
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2957 *root, struct btrfs_path *path,
2958 int min_data_size, int data_size,
2959 int empty, u32 min_slot)
2960{
2961 struct extent_buffer *left = path->nodes[0];
2962 struct extent_buffer *right;
2963 struct extent_buffer *upper;
2964 int slot;
2965 int free_space;
2966 u32 left_nritems;
2967 int ret;
2968
2969 if (!path->nodes[1])
2970 return 1;
2971
2972 slot = path->slots[1];
2973 upper = path->nodes[1];
2974 if (slot >= btrfs_header_nritems(upper) - 1)
2975 return 1;
2976
2977 btrfs_assert_tree_write_locked(path->nodes[1]);
2978
2979 right = btrfs_read_node_slot(upper, slot + 1);
2980
2981
2982
2983
2984 if (IS_ERR(right))
2985 return 1;
2986
2987 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
2988
2989 free_space = btrfs_leaf_free_space(right);
2990 if (free_space < data_size)
2991 goto out_unlock;
2992
2993
2994 ret = btrfs_cow_block(trans, root, right, upper,
2995 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
2996 if (ret)
2997 goto out_unlock;
2998
2999 free_space = btrfs_leaf_free_space(right);
3000 if (free_space < data_size)
3001 goto out_unlock;
3002
3003 left_nritems = btrfs_header_nritems(left);
3004 if (left_nritems == 0)
3005 goto out_unlock;
3006
3007 if (check_sibling_keys(left, right)) {
3008 ret = -EUCLEAN;
3009 btrfs_tree_unlock(right);
3010 free_extent_buffer(right);
3011 return ret;
3012 }
3013 if (path->slots[0] == left_nritems && !empty) {
3014
3015
3016
3017
3018 btrfs_tree_unlock(left);
3019 free_extent_buffer(left);
3020 path->nodes[0] = right;
3021 path->slots[0] = 0;
3022 path->slots[1]++;
3023 return 0;
3024 }
3025
3026 return __push_leaf_right(path, min_data_size, empty,
3027 right, free_space, left_nritems, min_slot);
3028out_unlock:
3029 btrfs_tree_unlock(right);
3030 free_extent_buffer(right);
3031 return 1;
3032}
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
3043 int empty, struct extent_buffer *left,
3044 int free_space, u32 right_nritems,
3045 u32 max_slot)
3046{
3047 struct btrfs_fs_info *fs_info = left->fs_info;
3048 struct btrfs_disk_key disk_key;
3049 struct extent_buffer *right = path->nodes[0];
3050 int i;
3051 int push_space = 0;
3052 int push_items = 0;
3053 u32 old_left_nritems;
3054 u32 nr;
3055 int ret = 0;
3056 u32 this_item_size;
3057 u32 old_left_item_size;
3058 struct btrfs_map_token token;
3059
3060 if (empty)
3061 nr = min(right_nritems, max_slot);
3062 else
3063 nr = min(right_nritems - 1, max_slot);
3064
3065 for (i = 0; i < nr; i++) {
3066 if (!empty && push_items > 0) {
3067 if (path->slots[0] < i)
3068 break;
3069 if (path->slots[0] == i) {
3070 int space = btrfs_leaf_free_space(right);
3071
3072 if (space + push_space * 2 > free_space)
3073 break;
3074 }
3075 }
3076
3077 if (path->slots[0] == i)
3078 push_space += data_size;
3079
3080 this_item_size = btrfs_item_size(right, i);
3081 if (this_item_size + sizeof(struct btrfs_item) + push_space >
3082 free_space)
3083 break;
3084
3085 push_items++;
3086 push_space += this_item_size + sizeof(struct btrfs_item);
3087 }
3088
3089 if (push_items == 0) {
3090 ret = 1;
3091 goto out;
3092 }
3093 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
3094
3095
3096 copy_extent_buffer(left, right,
3097 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3098 btrfs_item_nr_offset(0),
3099 push_items * sizeof(struct btrfs_item));
3100
3101 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
3102 btrfs_item_offset(right, push_items - 1);
3103
3104 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
3105 leaf_data_end(left) - push_space,
3106 BTRFS_LEAF_DATA_OFFSET +
3107 btrfs_item_offset(right, push_items - 1),
3108 push_space);
3109 old_left_nritems = btrfs_header_nritems(left);
3110 BUG_ON(old_left_nritems <= 0);
3111
3112 btrfs_init_map_token(&token, left);
3113 old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1);
3114 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
3115 u32 ioff;
3116
3117 ioff = btrfs_token_item_offset(&token, i);
3118 btrfs_set_token_item_offset(&token, i,
3119 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
3120 }
3121 btrfs_set_header_nritems(left, old_left_nritems + push_items);
3122
3123
3124 if (push_items > right_nritems)
3125 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
3126 right_nritems);
3127
3128 if (push_items < right_nritems) {
3129 push_space = btrfs_item_offset(right, push_items - 1) -
3130 leaf_data_end(right);
3131 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
3132 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
3133 BTRFS_LEAF_DATA_OFFSET +
3134 leaf_data_end(right), push_space);
3135
3136 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
3137 btrfs_item_nr_offset(push_items),
3138 (btrfs_header_nritems(right) - push_items) *
3139 sizeof(struct btrfs_item));
3140 }
3141
3142 btrfs_init_map_token(&token, right);
3143 right_nritems -= push_items;
3144 btrfs_set_header_nritems(right, right_nritems);
3145 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
3146 for (i = 0; i < right_nritems; i++) {
3147 push_space = push_space - btrfs_token_item_size(&token, i);
3148 btrfs_set_token_item_offset(&token, i, push_space);
3149 }
3150
3151 btrfs_mark_buffer_dirty(left);
3152 if (right_nritems)
3153 btrfs_mark_buffer_dirty(right);
3154 else
3155 btrfs_clean_tree_block(right);
3156
3157 btrfs_item_key(right, &disk_key, 0);
3158 fixup_low_keys(path, &disk_key, 1);
3159
3160
3161 if (path->slots[0] < push_items) {
3162 path->slots[0] += old_left_nritems;
3163 btrfs_tree_unlock(path->nodes[0]);
3164 free_extent_buffer(path->nodes[0]);
3165 path->nodes[0] = left;
3166 path->slots[1] -= 1;
3167 } else {
3168 btrfs_tree_unlock(left);
3169 free_extent_buffer(left);
3170 path->slots[0] -= push_items;
3171 }
3172 BUG_ON(path->slots[0] < 0);
3173 return ret;
3174out:
3175 btrfs_tree_unlock(left);
3176 free_extent_buffer(left);
3177 return ret;
3178}
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3189 *root, struct btrfs_path *path, int min_data_size,
3190 int data_size, int empty, u32 max_slot)
3191{
3192 struct extent_buffer *right = path->nodes[0];
3193 struct extent_buffer *left;
3194 int slot;
3195 int free_space;
3196 u32 right_nritems;
3197 int ret = 0;
3198
3199 slot = path->slots[1];
3200 if (slot == 0)
3201 return 1;
3202 if (!path->nodes[1])
3203 return 1;
3204
3205 right_nritems = btrfs_header_nritems(right);
3206 if (right_nritems == 0)
3207 return 1;
3208
3209 btrfs_assert_tree_write_locked(path->nodes[1]);
3210
3211 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
3212
3213
3214
3215
3216 if (IS_ERR(left))
3217 return 1;
3218
3219 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
3220
3221 free_space = btrfs_leaf_free_space(left);
3222 if (free_space < data_size) {
3223 ret = 1;
3224 goto out;
3225 }
3226
3227
3228 ret = btrfs_cow_block(trans, root, left,
3229 path->nodes[1], slot - 1, &left,
3230 BTRFS_NESTING_LEFT_COW);
3231 if (ret) {
3232
3233 if (ret == -ENOSPC)
3234 ret = 1;
3235 goto out;
3236 }
3237
3238 free_space = btrfs_leaf_free_space(left);
3239 if (free_space < data_size) {
3240 ret = 1;
3241 goto out;
3242 }
3243
3244 if (check_sibling_keys(left, right)) {
3245 ret = -EUCLEAN;
3246 goto out;
3247 }
3248 return __push_leaf_left(path, min_data_size,
3249 empty, left, free_space, right_nritems,
3250 max_slot);
3251out:
3252 btrfs_tree_unlock(left);
3253 free_extent_buffer(left);
3254 return ret;
3255}
3256
3257
3258
3259
3260
3261static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3262 struct btrfs_path *path,
3263 struct extent_buffer *l,
3264 struct extent_buffer *right,
3265 int slot, int mid, int nritems)
3266{
3267 struct btrfs_fs_info *fs_info = trans->fs_info;
3268 int data_copy_size;
3269 int rt_data_off;
3270 int i;
3271 struct btrfs_disk_key disk_key;
3272 struct btrfs_map_token token;
3273
3274 nritems = nritems - mid;
3275 btrfs_set_header_nritems(right, nritems);
3276 data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l);
3277
3278 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
3279 btrfs_item_nr_offset(mid),
3280 nritems * sizeof(struct btrfs_item));
3281
3282 copy_extent_buffer(right, l,
3283 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
3284 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
3285 leaf_data_end(l), data_copy_size);
3286
3287 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
3288
3289 btrfs_init_map_token(&token, right);
3290 for (i = 0; i < nritems; i++) {
3291 u32 ioff;
3292
3293 ioff = btrfs_token_item_offset(&token, i);
3294 btrfs_set_token_item_offset(&token, i, ioff + rt_data_off);
3295 }
3296
3297 btrfs_set_header_nritems(l, mid);
3298 btrfs_item_key(right, &disk_key, 0);
3299 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
3300
3301 btrfs_mark_buffer_dirty(right);
3302 btrfs_mark_buffer_dirty(l);
3303 BUG_ON(path->slots[0] != slot);
3304
3305 if (mid <= slot) {
3306 btrfs_tree_unlock(path->nodes[0]);
3307 free_extent_buffer(path->nodes[0]);
3308 path->nodes[0] = right;
3309 path->slots[0] -= mid;
3310 path->slots[1] += 1;
3311 } else {
3312 btrfs_tree_unlock(right);
3313 free_extent_buffer(right);
3314 }
3315
3316 BUG_ON(path->slots[0] < 0);
3317}
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3330 struct btrfs_root *root,
3331 struct btrfs_path *path,
3332 int data_size)
3333{
3334 int ret;
3335 int progress = 0;
3336 int slot;
3337 u32 nritems;
3338 int space_needed = data_size;
3339
3340 slot = path->slots[0];
3341 if (slot < btrfs_header_nritems(path->nodes[0]))
3342 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3343
3344
3345
3346
3347
3348 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
3349 if (ret < 0)
3350 return ret;
3351
3352 if (ret == 0)
3353 progress++;
3354
3355 nritems = btrfs_header_nritems(path->nodes[0]);
3356
3357
3358
3359
3360 if (path->slots[0] == 0 || path->slots[0] == nritems)
3361 return 0;
3362
3363 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3364 return 0;
3365
3366
3367 slot = path->slots[0];
3368 space_needed = data_size;
3369 if (slot > 0)
3370 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3371 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
3372 if (ret < 0)
3373 return ret;
3374
3375 if (ret == 0)
3376 progress++;
3377
3378 if (progress)
3379 return 0;
3380 return 1;
3381}
3382
3383
3384
3385
3386
3387
3388
3389static noinline int split_leaf(struct btrfs_trans_handle *trans,
3390 struct btrfs_root *root,
3391 const struct btrfs_key *ins_key,
3392 struct btrfs_path *path, int data_size,
3393 int extend)
3394{
3395 struct btrfs_disk_key disk_key;
3396 struct extent_buffer *l;
3397 u32 nritems;
3398 int mid;
3399 int slot;
3400 struct extent_buffer *right;
3401 struct btrfs_fs_info *fs_info = root->fs_info;
3402 int ret = 0;
3403 int wret;
3404 int split;
3405 int num_doubles = 0;
3406 int tried_avoid_double = 0;
3407
3408 l = path->nodes[0];
3409 slot = path->slots[0];
3410 if (extend && data_size + btrfs_item_size(l, slot) +
3411 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
3412 return -EOVERFLOW;
3413
3414
3415 if (data_size && path->nodes[1]) {
3416 int space_needed = data_size;
3417
3418 if (slot < btrfs_header_nritems(l))
3419 space_needed -= btrfs_leaf_free_space(l);
3420
3421 wret = push_leaf_right(trans, root, path, space_needed,
3422 space_needed, 0, 0);
3423 if (wret < 0)
3424 return wret;
3425 if (wret) {
3426 space_needed = data_size;
3427 if (slot > 0)
3428 space_needed -= btrfs_leaf_free_space(l);
3429 wret = push_leaf_left(trans, root, path, space_needed,
3430 space_needed, 0, (u32)-1);
3431 if (wret < 0)
3432 return wret;
3433 }
3434 l = path->nodes[0];
3435
3436
3437 if (btrfs_leaf_free_space(l) >= data_size)
3438 return 0;
3439 }
3440
3441 if (!path->nodes[1]) {
3442 ret = insert_new_root(trans, root, path, 1);
3443 if (ret)
3444 return ret;
3445 }
3446again:
3447 split = 1;
3448 l = path->nodes[0];
3449 slot = path->slots[0];
3450 nritems = btrfs_header_nritems(l);
3451 mid = (nritems + 1) / 2;
3452
3453 if (mid <= slot) {
3454 if (nritems == 1 ||
3455 leaf_space_used(l, mid, nritems - mid) + data_size >
3456 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3457 if (slot >= nritems) {
3458 split = 0;
3459 } else {
3460 mid = slot;
3461 if (mid != nritems &&
3462 leaf_space_used(l, mid, nritems - mid) +
3463 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3464 if (data_size && !tried_avoid_double)
3465 goto push_for_double;
3466 split = 2;
3467 }
3468 }
3469 }
3470 } else {
3471 if (leaf_space_used(l, 0, mid) + data_size >
3472 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3473 if (!extend && data_size && slot == 0) {
3474 split = 0;
3475 } else if ((extend || !data_size) && slot == 0) {
3476 mid = 1;
3477 } else {
3478 mid = slot;
3479 if (mid != nritems &&
3480 leaf_space_used(l, mid, nritems - mid) +
3481 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3482 if (data_size && !tried_avoid_double)
3483 goto push_for_double;
3484 split = 2;
3485 }
3486 }
3487 }
3488 }
3489
3490 if (split == 0)
3491 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3492 else
3493 btrfs_item_key(l, &disk_key, mid);
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3504 &disk_key, 0, l->start, 0,
3505 num_doubles ? BTRFS_NESTING_NEW_ROOT :
3506 BTRFS_NESTING_SPLIT);
3507 if (IS_ERR(right))
3508 return PTR_ERR(right);
3509
3510 root_add_used(root, fs_info->nodesize);
3511
3512 if (split == 0) {
3513 if (mid <= slot) {
3514 btrfs_set_header_nritems(right, 0);
3515 insert_ptr(trans, path, &disk_key,
3516 right->start, path->slots[1] + 1, 1);
3517 btrfs_tree_unlock(path->nodes[0]);
3518 free_extent_buffer(path->nodes[0]);
3519 path->nodes[0] = right;
3520 path->slots[0] = 0;
3521 path->slots[1] += 1;
3522 } else {
3523 btrfs_set_header_nritems(right, 0);
3524 insert_ptr(trans, path, &disk_key,
3525 right->start, path->slots[1], 1);
3526 btrfs_tree_unlock(path->nodes[0]);
3527 free_extent_buffer(path->nodes[0]);
3528 path->nodes[0] = right;
3529 path->slots[0] = 0;
3530 if (path->slots[1] == 0)
3531 fixup_low_keys(path, &disk_key, 1);
3532 }
3533
3534
3535
3536
3537
3538 return ret;
3539 }
3540
3541 copy_for_split(trans, path, l, right, slot, mid, nritems);
3542
3543 if (split == 2) {
3544 BUG_ON(num_doubles != 0);
3545 num_doubles++;
3546 goto again;
3547 }
3548
3549 return 0;
3550
3551push_for_double:
3552 push_for_double_split(trans, root, path, data_size);
3553 tried_avoid_double = 1;
3554 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3555 return 0;
3556 goto again;
3557}
3558
3559static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3560 struct btrfs_root *root,
3561 struct btrfs_path *path, int ins_len)
3562{
3563 struct btrfs_key key;
3564 struct extent_buffer *leaf;
3565 struct btrfs_file_extent_item *fi;
3566 u64 extent_len = 0;
3567 u32 item_size;
3568 int ret;
3569
3570 leaf = path->nodes[0];
3571 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3572
3573 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3574 key.type != BTRFS_EXTENT_CSUM_KEY);
3575
3576 if (btrfs_leaf_free_space(leaf) >= ins_len)
3577 return 0;
3578
3579 item_size = btrfs_item_size(leaf, path->slots[0]);
3580 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3581 fi = btrfs_item_ptr(leaf, path->slots[0],
3582 struct btrfs_file_extent_item);
3583 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3584 }
3585 btrfs_release_path(path);
3586
3587 path->keep_locks = 1;
3588 path->search_for_split = 1;
3589 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3590 path->search_for_split = 0;
3591 if (ret > 0)
3592 ret = -EAGAIN;
3593 if (ret < 0)
3594 goto err;
3595
3596 ret = -EAGAIN;
3597 leaf = path->nodes[0];
3598
3599 if (item_size != btrfs_item_size(leaf, path->slots[0]))
3600 goto err;
3601
3602
3603 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
3604 goto err;
3605
3606 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3607 fi = btrfs_item_ptr(leaf, path->slots[0],
3608 struct btrfs_file_extent_item);
3609 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3610 goto err;
3611 }
3612
3613 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3614 if (ret)
3615 goto err;
3616
3617 path->keep_locks = 0;
3618 btrfs_unlock_up_safe(path, 1);
3619 return 0;
3620err:
3621 path->keep_locks = 0;
3622 return ret;
3623}
3624
3625static noinline int split_item(struct btrfs_path *path,
3626 const struct btrfs_key *new_key,
3627 unsigned long split_offset)
3628{
3629 struct extent_buffer *leaf;
3630 int orig_slot, slot;
3631 char *buf;
3632 u32 nritems;
3633 u32 item_size;
3634 u32 orig_offset;
3635 struct btrfs_disk_key disk_key;
3636
3637 leaf = path->nodes[0];
3638 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
3639
3640 orig_slot = path->slots[0];
3641 orig_offset = btrfs_item_offset(leaf, path->slots[0]);
3642 item_size = btrfs_item_size(leaf, path->slots[0]);
3643
3644 buf = kmalloc(item_size, GFP_NOFS);
3645 if (!buf)
3646 return -ENOMEM;
3647
3648 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3649 path->slots[0]), item_size);
3650
3651 slot = path->slots[0] + 1;
3652 nritems = btrfs_header_nritems(leaf);
3653 if (slot != nritems) {
3654
3655 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3656 btrfs_item_nr_offset(slot),
3657 (nritems - slot) * sizeof(struct btrfs_item));
3658 }
3659
3660 btrfs_cpu_key_to_disk(&disk_key, new_key);
3661 btrfs_set_item_key(leaf, &disk_key, slot);
3662
3663 btrfs_set_item_offset(leaf, slot, orig_offset);
3664 btrfs_set_item_size(leaf, slot, item_size - split_offset);
3665
3666 btrfs_set_item_offset(leaf, orig_slot,
3667 orig_offset + item_size - split_offset);
3668 btrfs_set_item_size(leaf, orig_slot, split_offset);
3669
3670 btrfs_set_header_nritems(leaf, nritems + 1);
3671
3672
3673 write_extent_buffer(leaf, buf,
3674 btrfs_item_ptr_offset(leaf, path->slots[0]),
3675 split_offset);
3676
3677
3678 write_extent_buffer(leaf, buf + split_offset,
3679 btrfs_item_ptr_offset(leaf, slot),
3680 item_size - split_offset);
3681 btrfs_mark_buffer_dirty(leaf);
3682
3683 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
3684 kfree(buf);
3685 return 0;
3686}
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703int btrfs_split_item(struct btrfs_trans_handle *trans,
3704 struct btrfs_root *root,
3705 struct btrfs_path *path,
3706 const struct btrfs_key *new_key,
3707 unsigned long split_offset)
3708{
3709 int ret;
3710 ret = setup_leaf_for_split(trans, root, path,
3711 sizeof(struct btrfs_item));
3712 if (ret)
3713 return ret;
3714
3715 ret = split_item(path, new_key, split_offset);
3716 return ret;
3717}
3718
3719
3720
3721
3722
3723
3724
3725void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
3726{
3727 int slot;
3728 struct extent_buffer *leaf;
3729 u32 nritems;
3730 unsigned int data_end;
3731 unsigned int old_data_start;
3732 unsigned int old_size;
3733 unsigned int size_diff;
3734 int i;
3735 struct btrfs_map_token token;
3736
3737 leaf = path->nodes[0];
3738 slot = path->slots[0];
3739
3740 old_size = btrfs_item_size(leaf, slot);
3741 if (old_size == new_size)
3742 return;
3743
3744 nritems = btrfs_header_nritems(leaf);
3745 data_end = leaf_data_end(leaf);
3746
3747 old_data_start = btrfs_item_offset(leaf, slot);
3748
3749 size_diff = old_size - new_size;
3750
3751 BUG_ON(slot < 0);
3752 BUG_ON(slot >= nritems);
3753
3754
3755
3756
3757
3758 btrfs_init_map_token(&token, leaf);
3759 for (i = slot; i < nritems; i++) {
3760 u32 ioff;
3761
3762 ioff = btrfs_token_item_offset(&token, i);
3763 btrfs_set_token_item_offset(&token, i, ioff + size_diff);
3764 }
3765
3766
3767 if (from_end) {
3768 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3769 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3770 data_end, old_data_start + new_size - data_end);
3771 } else {
3772 struct btrfs_disk_key disk_key;
3773 u64 offset;
3774
3775 btrfs_item_key(leaf, &disk_key, slot);
3776
3777 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3778 unsigned long ptr;
3779 struct btrfs_file_extent_item *fi;
3780
3781 fi = btrfs_item_ptr(leaf, slot,
3782 struct btrfs_file_extent_item);
3783 fi = (struct btrfs_file_extent_item *)(
3784 (unsigned long)fi - size_diff);
3785
3786 if (btrfs_file_extent_type(leaf, fi) ==
3787 BTRFS_FILE_EXTENT_INLINE) {
3788 ptr = btrfs_item_ptr_offset(leaf, slot);
3789 memmove_extent_buffer(leaf, ptr,
3790 (unsigned long)fi,
3791 BTRFS_FILE_EXTENT_INLINE_DATA_START);
3792 }
3793 }
3794
3795 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3796 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3797 data_end, old_data_start - data_end);
3798
3799 offset = btrfs_disk_key_offset(&disk_key);
3800 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3801 btrfs_set_item_key(leaf, &disk_key, slot);
3802 if (slot == 0)
3803 fixup_low_keys(path, &disk_key, 1);
3804 }
3805
3806 btrfs_set_item_size(leaf, slot, new_size);
3807 btrfs_mark_buffer_dirty(leaf);
3808
3809 if (btrfs_leaf_free_space(leaf) < 0) {
3810 btrfs_print_leaf(leaf);
3811 BUG();
3812 }
3813}
3814
3815
3816
3817
3818void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
3819{
3820 int slot;
3821 struct extent_buffer *leaf;
3822 u32 nritems;
3823 unsigned int data_end;
3824 unsigned int old_data;
3825 unsigned int old_size;
3826 int i;
3827 struct btrfs_map_token token;
3828
3829 leaf = path->nodes[0];
3830
3831 nritems = btrfs_header_nritems(leaf);
3832 data_end = leaf_data_end(leaf);
3833
3834 if (btrfs_leaf_free_space(leaf) < data_size) {
3835 btrfs_print_leaf(leaf);
3836 BUG();
3837 }
3838 slot = path->slots[0];
3839 old_data = btrfs_item_data_end(leaf, slot);
3840
3841 BUG_ON(slot < 0);
3842 if (slot >= nritems) {
3843 btrfs_print_leaf(leaf);
3844 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
3845 slot, nritems);
3846 BUG();
3847 }
3848
3849
3850
3851
3852
3853 btrfs_init_map_token(&token, leaf);
3854 for (i = slot; i < nritems; i++) {
3855 u32 ioff;
3856
3857 ioff = btrfs_token_item_offset(&token, i);
3858 btrfs_set_token_item_offset(&token, i, ioff - data_size);
3859 }
3860
3861
3862 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3863 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
3864 data_end, old_data - data_end);
3865
3866 data_end = old_data;
3867 old_size = btrfs_item_size(leaf, slot);
3868 btrfs_set_item_size(leaf, slot, old_size + data_size);
3869 btrfs_mark_buffer_dirty(leaf);
3870
3871 if (btrfs_leaf_free_space(leaf) < 0) {
3872 btrfs_print_leaf(leaf);
3873 BUG();
3874 }
3875}
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3887 const struct btrfs_item_batch *batch)
3888{
3889 struct btrfs_fs_info *fs_info = root->fs_info;
3890 int i;
3891 u32 nritems;
3892 unsigned int data_end;
3893 struct btrfs_disk_key disk_key;
3894 struct extent_buffer *leaf;
3895 int slot;
3896 struct btrfs_map_token token;
3897 u32 total_size;
3898
3899
3900
3901
3902
3903
3904 if (path->slots[0] == 0) {
3905 btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
3906 fixup_low_keys(path, &disk_key, 1);
3907 }
3908 btrfs_unlock_up_safe(path, 1);
3909
3910 leaf = path->nodes[0];
3911 slot = path->slots[0];
3912
3913 nritems = btrfs_header_nritems(leaf);
3914 data_end = leaf_data_end(leaf);
3915 total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
3916
3917 if (btrfs_leaf_free_space(leaf) < total_size) {
3918 btrfs_print_leaf(leaf);
3919 btrfs_crit(fs_info, "not enough freespace need %u have %d",
3920 total_size, btrfs_leaf_free_space(leaf));
3921 BUG();
3922 }
3923
3924 btrfs_init_map_token(&token, leaf);
3925 if (slot != nritems) {
3926 unsigned int old_data = btrfs_item_data_end(leaf, slot);
3927
3928 if (old_data < data_end) {
3929 btrfs_print_leaf(leaf);
3930 btrfs_crit(fs_info,
3931 "item at slot %d with data offset %u beyond data end of leaf %u",
3932 slot, old_data, data_end);
3933 BUG();
3934 }
3935
3936
3937
3938
3939 for (i = slot; i < nritems; i++) {
3940 u32 ioff;
3941
3942 ioff = btrfs_token_item_offset(&token, i);
3943 btrfs_set_token_item_offset(&token, i,
3944 ioff - batch->total_data_size);
3945 }
3946
3947 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
3948 btrfs_item_nr_offset(slot),
3949 (nritems - slot) * sizeof(struct btrfs_item));
3950
3951
3952 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3953 data_end - batch->total_data_size,
3954 BTRFS_LEAF_DATA_OFFSET + data_end,
3955 old_data - data_end);
3956 data_end = old_data;
3957 }
3958
3959
3960 for (i = 0; i < batch->nr; i++) {
3961 btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
3962 btrfs_set_item_key(leaf, &disk_key, slot + i);
3963 data_end -= batch->data_sizes[i];
3964 btrfs_set_token_item_offset(&token, slot + i, data_end);
3965 btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]);
3966 }
3967
3968 btrfs_set_header_nritems(leaf, nritems + batch->nr);
3969 btrfs_mark_buffer_dirty(leaf);
3970
3971 if (btrfs_leaf_free_space(leaf) < 0) {
3972 btrfs_print_leaf(leaf);
3973 BUG();
3974 }
3975}
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985void btrfs_setup_item_for_insert(struct btrfs_root *root,
3986 struct btrfs_path *path,
3987 const struct btrfs_key *key,
3988 u32 data_size)
3989{
3990 struct btrfs_item_batch batch;
3991
3992 batch.keys = key;
3993 batch.data_sizes = &data_size;
3994 batch.total_data_size = data_size;
3995 batch.nr = 1;
3996
3997 setup_items_for_insert(root, path, &batch);
3998}
3999
4000
4001
4002
4003
4004int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4005 struct btrfs_root *root,
4006 struct btrfs_path *path,
4007 const struct btrfs_item_batch *batch)
4008{
4009 int ret = 0;
4010 int slot;
4011 u32 total_size;
4012
4013 total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
4014 ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1);
4015 if (ret == 0)
4016 return -EEXIST;
4017 if (ret < 0)
4018 return ret;
4019
4020 slot = path->slots[0];
4021 BUG_ON(slot < 0);
4022
4023 setup_items_for_insert(root, path, batch);
4024 return 0;
4025}
4026
4027
4028
4029
4030
4031int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4032 const struct btrfs_key *cpu_key, void *data,
4033 u32 data_size)
4034{
4035 int ret = 0;
4036 struct btrfs_path *path;
4037 struct extent_buffer *leaf;
4038 unsigned long ptr;
4039
4040 path = btrfs_alloc_path();
4041 if (!path)
4042 return -ENOMEM;
4043 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
4044 if (!ret) {
4045 leaf = path->nodes[0];
4046 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4047 write_extent_buffer(leaf, data, ptr, data_size);
4048 btrfs_mark_buffer_dirty(leaf);
4049 }
4050 btrfs_free_path(path);
4051 return ret;
4052}
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4063 struct btrfs_root *root,
4064 struct btrfs_path *path,
4065 const struct btrfs_key *new_key)
4066{
4067 struct extent_buffer *leaf;
4068 int ret;
4069 u32 item_size;
4070
4071 leaf = path->nodes[0];
4072 item_size = btrfs_item_size(leaf, path->slots[0]);
4073 ret = setup_leaf_for_split(trans, root, path,
4074 item_size + sizeof(struct btrfs_item));
4075 if (ret)
4076 return ret;
4077
4078 path->slots[0]++;
4079 btrfs_setup_item_for_insert(root, path, new_key, item_size);
4080 leaf = path->nodes[0];
4081 memcpy_extent_buffer(leaf,
4082 btrfs_item_ptr_offset(leaf, path->slots[0]),
4083 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4084 item_size);
4085 return 0;
4086}
4087
4088
4089
4090
4091
4092
4093
4094static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4095 int level, int slot)
4096{
4097 struct extent_buffer *parent = path->nodes[level];
4098 u32 nritems;
4099 int ret;
4100
4101 nritems = btrfs_header_nritems(parent);
4102 if (slot != nritems - 1) {
4103 if (level) {
4104 ret = btrfs_tree_mod_log_insert_move(parent, slot,
4105 slot + 1, nritems - slot - 1);
4106 BUG_ON(ret < 0);
4107 }
4108 memmove_extent_buffer(parent,
4109 btrfs_node_key_ptr_offset(slot),
4110 btrfs_node_key_ptr_offset(slot + 1),
4111 sizeof(struct btrfs_key_ptr) *
4112 (nritems - slot - 1));
4113 } else if (level) {
4114 ret = btrfs_tree_mod_log_insert_key(parent, slot,
4115 BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS);
4116 BUG_ON(ret < 0);
4117 }
4118
4119 nritems--;
4120 btrfs_set_header_nritems(parent, nritems);
4121 if (nritems == 0 && parent == root->node) {
4122 BUG_ON(btrfs_header_level(root->node) != 1);
4123
4124 btrfs_set_header_level(root->node, 0);
4125 } else if (slot == 0) {
4126 struct btrfs_disk_key disk_key;
4127
4128 btrfs_node_key(parent, &disk_key, 0);
4129 fixup_low_keys(path, &disk_key, level + 1);
4130 }
4131 btrfs_mark_buffer_dirty(parent);
4132}
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4145 struct btrfs_root *root,
4146 struct btrfs_path *path,
4147 struct extent_buffer *leaf)
4148{
4149 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4150 del_ptr(root, path, 1, path->slots[1]);
4151
4152
4153
4154
4155
4156 btrfs_unlock_up_safe(path, 0);
4157
4158 root_sub_used(root, leaf->len);
4159
4160 atomic_inc(&leaf->refs);
4161 btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
4162 free_extent_buffer_stale(leaf);
4163}
4164
4165
4166
4167
4168int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4169 struct btrfs_path *path, int slot, int nr)
4170{
4171 struct btrfs_fs_info *fs_info = root->fs_info;
4172 struct extent_buffer *leaf;
4173 u32 last_off;
4174 u32 dsize = 0;
4175 int ret = 0;
4176 int wret;
4177 int i;
4178 u32 nritems;
4179
4180 leaf = path->nodes[0];
4181 last_off = btrfs_item_offset(leaf, slot + nr - 1);
4182
4183 for (i = 0; i < nr; i++)
4184 dsize += btrfs_item_size(leaf, slot + i);
4185
4186 nritems = btrfs_header_nritems(leaf);
4187
4188 if (slot + nr != nritems) {
4189 int data_end = leaf_data_end(leaf);
4190 struct btrfs_map_token token;
4191
4192 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4193 data_end + dsize,
4194 BTRFS_LEAF_DATA_OFFSET + data_end,
4195 last_off - data_end);
4196
4197 btrfs_init_map_token(&token, leaf);
4198 for (i = slot + nr; i < nritems; i++) {
4199 u32 ioff;
4200
4201 ioff = btrfs_token_item_offset(&token, i);
4202 btrfs_set_token_item_offset(&token, i, ioff + dsize);
4203 }
4204
4205 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
4206 btrfs_item_nr_offset(slot + nr),
4207 sizeof(struct btrfs_item) *
4208 (nritems - slot - nr));
4209 }
4210 btrfs_set_header_nritems(leaf, nritems - nr);
4211 nritems -= nr;
4212
4213
4214 if (nritems == 0) {
4215 if (leaf == root->node) {
4216 btrfs_set_header_level(leaf, 0);
4217 } else {
4218 btrfs_clean_tree_block(leaf);
4219 btrfs_del_leaf(trans, root, path, leaf);
4220 }
4221 } else {
4222 int used = leaf_space_used(leaf, 0, nritems);
4223 if (slot == 0) {
4224 struct btrfs_disk_key disk_key;
4225
4226 btrfs_item_key(leaf, &disk_key, 0);
4227 fixup_low_keys(path, &disk_key, 1);
4228 }
4229
4230
4231 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
4232
4233
4234
4235
4236 slot = path->slots[1];
4237 atomic_inc(&leaf->refs);
4238
4239 wret = push_leaf_left(trans, root, path, 1, 1,
4240 1, (u32)-1);
4241 if (wret < 0 && wret != -ENOSPC)
4242 ret = wret;
4243
4244 if (path->nodes[0] == leaf &&
4245 btrfs_header_nritems(leaf)) {
4246 wret = push_leaf_right(trans, root, path, 1,
4247 1, 1, 0);
4248 if (wret < 0 && wret != -ENOSPC)
4249 ret = wret;
4250 }
4251
4252 if (btrfs_header_nritems(leaf) == 0) {
4253 path->slots[1] = slot;
4254 btrfs_del_leaf(trans, root, path, leaf);
4255 free_extent_buffer(leaf);
4256 ret = 0;
4257 } else {
4258
4259
4260
4261
4262
4263 if (path->nodes[0] == leaf)
4264 btrfs_mark_buffer_dirty(leaf);
4265 free_extent_buffer(leaf);
4266 }
4267 } else {
4268 btrfs_mark_buffer_dirty(leaf);
4269 }
4270 }
4271 return ret;
4272}
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4283{
4284 struct btrfs_key key;
4285 struct btrfs_disk_key found_key;
4286 int ret;
4287
4288 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
4289
4290 if (key.offset > 0) {
4291 key.offset--;
4292 } else if (key.type > 0) {
4293 key.type--;
4294 key.offset = (u64)-1;
4295 } else if (key.objectid > 0) {
4296 key.objectid--;
4297 key.type = (u8)-1;
4298 key.offset = (u64)-1;
4299 } else {
4300 return 1;
4301 }
4302
4303 btrfs_release_path(path);
4304 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4305 if (ret < 0)
4306 return ret;
4307 btrfs_item_key(path->nodes[0], &found_key, 0);
4308 ret = comp_keys(&found_key, &key);
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319 if (ret <= 0)
4320 return 0;
4321 return 1;
4322}
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4344 struct btrfs_path *path,
4345 u64 min_trans)
4346{
4347 struct extent_buffer *cur;
4348 struct btrfs_key found_key;
4349 int slot;
4350 int sret;
4351 u32 nritems;
4352 int level;
4353 int ret = 1;
4354 int keep_locks = path->keep_locks;
4355
4356 path->keep_locks = 1;
4357again:
4358 cur = btrfs_read_lock_root_node(root);
4359 level = btrfs_header_level(cur);
4360 WARN_ON(path->nodes[level]);
4361 path->nodes[level] = cur;
4362 path->locks[level] = BTRFS_READ_LOCK;
4363
4364 if (btrfs_header_generation(cur) < min_trans) {
4365 ret = 1;
4366 goto out;
4367 }
4368 while (1) {
4369 nritems = btrfs_header_nritems(cur);
4370 level = btrfs_header_level(cur);
4371 sret = btrfs_bin_search(cur, min_key, &slot);
4372 if (sret < 0) {
4373 ret = sret;
4374 goto out;
4375 }
4376
4377
4378 if (level == path->lowest_level) {
4379 if (slot >= nritems)
4380 goto find_next_key;
4381 ret = 0;
4382 path->slots[level] = slot;
4383 btrfs_item_key_to_cpu(cur, &found_key, slot);
4384 goto out;
4385 }
4386 if (sret && slot > 0)
4387 slot--;
4388
4389
4390
4391
4392 while (slot < nritems) {
4393 u64 gen;
4394
4395 gen = btrfs_node_ptr_generation(cur, slot);
4396 if (gen < min_trans) {
4397 slot++;
4398 continue;
4399 }
4400 break;
4401 }
4402find_next_key:
4403
4404
4405
4406
4407 if (slot >= nritems) {
4408 path->slots[level] = slot;
4409 sret = btrfs_find_next_key(root, path, min_key, level,
4410 min_trans);
4411 if (sret == 0) {
4412 btrfs_release_path(path);
4413 goto again;
4414 } else {
4415 goto out;
4416 }
4417 }
4418
4419 btrfs_node_key_to_cpu(cur, &found_key, slot);
4420 path->slots[level] = slot;
4421 if (level == path->lowest_level) {
4422 ret = 0;
4423 goto out;
4424 }
4425 cur = btrfs_read_node_slot(cur, slot);
4426 if (IS_ERR(cur)) {
4427 ret = PTR_ERR(cur);
4428 goto out;
4429 }
4430
4431 btrfs_tree_read_lock(cur);
4432
4433 path->locks[level - 1] = BTRFS_READ_LOCK;
4434 path->nodes[level - 1] = cur;
4435 unlock_up(path, level, 1, 0, NULL);
4436 }
4437out:
4438 path->keep_locks = keep_locks;
4439 if (ret == 0) {
4440 btrfs_unlock_up_safe(path, path->lowest_level + 1);
4441 memcpy(min_key, &found_key, sizeof(found_key));
4442 }
4443 return ret;
4444}
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4458 struct btrfs_key *key, int level, u64 min_trans)
4459{
4460 int slot;
4461 struct extent_buffer *c;
4462
4463 WARN_ON(!path->keep_locks && !path->skip_locking);
4464 while (level < BTRFS_MAX_LEVEL) {
4465 if (!path->nodes[level])
4466 return 1;
4467
4468 slot = path->slots[level] + 1;
4469 c = path->nodes[level];
4470next:
4471 if (slot >= btrfs_header_nritems(c)) {
4472 int ret;
4473 int orig_lowest;
4474 struct btrfs_key cur_key;
4475 if (level + 1 >= BTRFS_MAX_LEVEL ||
4476 !path->nodes[level + 1])
4477 return 1;
4478
4479 if (path->locks[level + 1] || path->skip_locking) {
4480 level++;
4481 continue;
4482 }
4483
4484 slot = btrfs_header_nritems(c) - 1;
4485 if (level == 0)
4486 btrfs_item_key_to_cpu(c, &cur_key, slot);
4487 else
4488 btrfs_node_key_to_cpu(c, &cur_key, slot);
4489
4490 orig_lowest = path->lowest_level;
4491 btrfs_release_path(path);
4492 path->lowest_level = level;
4493 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4494 0, 0);
4495 path->lowest_level = orig_lowest;
4496 if (ret < 0)
4497 return ret;
4498
4499 c = path->nodes[level];
4500 slot = path->slots[level];
4501 if (ret == 0)
4502 slot++;
4503 goto next;
4504 }
4505
4506 if (level == 0)
4507 btrfs_item_key_to_cpu(c, key, slot);
4508 else {
4509 u64 gen = btrfs_node_ptr_generation(c, slot);
4510
4511 if (gen < min_trans) {
4512 slot++;
4513 goto next;
4514 }
4515 btrfs_node_key_to_cpu(c, key, slot);
4516 }
4517 return 0;
4518 }
4519 return 1;
4520}
4521
4522int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
4523 u64 time_seq)
4524{
4525 int slot;
4526 int level;
4527 struct extent_buffer *c;
4528 struct extent_buffer *next;
4529 struct btrfs_fs_info *fs_info = root->fs_info;
4530 struct btrfs_key key;
4531 bool need_commit_sem = false;
4532 u32 nritems;
4533 int ret;
4534 int i;
4535
4536 nritems = btrfs_header_nritems(path->nodes[0]);
4537 if (nritems == 0)
4538 return 1;
4539
4540 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4541again:
4542 level = 1;
4543 next = NULL;
4544 btrfs_release_path(path);
4545
4546 path->keep_locks = 1;
4547
4548 if (time_seq) {
4549 ret = btrfs_search_old_slot(root, &key, path, time_seq);
4550 } else {
4551 if (path->need_commit_sem) {
4552 path->need_commit_sem = 0;
4553 need_commit_sem = true;
4554 down_read(&fs_info->commit_root_sem);
4555 }
4556 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4557 }
4558 path->keep_locks = 0;
4559
4560 if (ret < 0)
4561 goto done;
4562
4563 nritems = btrfs_header_nritems(path->nodes[0]);
4564
4565
4566
4567
4568
4569
4570 if (nritems > 0 && path->slots[0] < nritems - 1) {
4571 if (ret == 0)
4572 path->slots[0]++;
4573 ret = 0;
4574 goto done;
4575 }
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
4591 ret = 0;
4592 goto done;
4593 }
4594
4595 while (level < BTRFS_MAX_LEVEL) {
4596 if (!path->nodes[level]) {
4597 ret = 1;
4598 goto done;
4599 }
4600
4601 slot = path->slots[level] + 1;
4602 c = path->nodes[level];
4603 if (slot >= btrfs_header_nritems(c)) {
4604 level++;
4605 if (level == BTRFS_MAX_LEVEL) {
4606 ret = 1;
4607 goto done;
4608 }
4609 continue;
4610 }
4611
4612
4613
4614
4615
4616
4617
4618 for (i = 0; i < level; i++) {
4619 if (path->locks[level]) {
4620 btrfs_tree_read_unlock(path->nodes[i]);
4621 path->locks[i] = 0;
4622 }
4623 free_extent_buffer(path->nodes[i]);
4624 path->nodes[i] = NULL;
4625 }
4626
4627 next = c;
4628 ret = read_block_for_search(root, path, &next, level,
4629 slot, &key);
4630 if (ret == -EAGAIN)
4631 goto again;
4632
4633 if (ret < 0) {
4634 btrfs_release_path(path);
4635 goto done;
4636 }
4637
4638 if (!path->skip_locking) {
4639 ret = btrfs_try_tree_read_lock(next);
4640 if (!ret && time_seq) {
4641
4642
4643
4644
4645
4646
4647
4648 free_extent_buffer(next);
4649 btrfs_release_path(path);
4650 cond_resched();
4651 goto again;
4652 }
4653 if (!ret)
4654 btrfs_tree_read_lock(next);
4655 }
4656 break;
4657 }
4658 path->slots[level] = slot;
4659 while (1) {
4660 level--;
4661 path->nodes[level] = next;
4662 path->slots[level] = 0;
4663 if (!path->skip_locking)
4664 path->locks[level] = BTRFS_READ_LOCK;
4665 if (!level)
4666 break;
4667
4668 ret = read_block_for_search(root, path, &next, level,
4669 0, &key);
4670 if (ret == -EAGAIN)
4671 goto again;
4672
4673 if (ret < 0) {
4674 btrfs_release_path(path);
4675 goto done;
4676 }
4677
4678 if (!path->skip_locking)
4679 btrfs_tree_read_lock(next);
4680 }
4681 ret = 0;
4682done:
4683 unlock_up(path, 0, 1, 0, NULL);
4684 if (need_commit_sem) {
4685 int ret2;
4686
4687 path->need_commit_sem = 1;
4688 ret2 = finish_need_commit_sem_search(path);
4689 up_read(&fs_info->commit_root_sem);
4690 if (ret2)
4691 ret = ret2;
4692 }
4693
4694 return ret;
4695}
4696
4697
4698
4699
4700
4701
4702
4703int btrfs_previous_item(struct btrfs_root *root,
4704 struct btrfs_path *path, u64 min_objectid,
4705 int type)
4706{
4707 struct btrfs_key found_key;
4708 struct extent_buffer *leaf;
4709 u32 nritems;
4710 int ret;
4711
4712 while (1) {
4713 if (path->slots[0] == 0) {
4714 ret = btrfs_prev_leaf(root, path);
4715 if (ret != 0)
4716 return ret;
4717 } else {
4718 path->slots[0]--;
4719 }
4720 leaf = path->nodes[0];
4721 nritems = btrfs_header_nritems(leaf);
4722 if (nritems == 0)
4723 return 1;
4724 if (path->slots[0] == nritems)
4725 path->slots[0]--;
4726
4727 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4728 if (found_key.objectid < min_objectid)
4729 break;
4730 if (found_key.type == type)
4731 return 0;
4732 if (found_key.objectid == min_objectid &&
4733 found_key.type < type)
4734 break;
4735 }
4736 return 1;
4737}
4738
4739
4740
4741
4742
4743
4744
4745int btrfs_previous_extent_item(struct btrfs_root *root,
4746 struct btrfs_path *path, u64 min_objectid)
4747{
4748 struct btrfs_key found_key;
4749 struct extent_buffer *leaf;
4750 u32 nritems;
4751 int ret;
4752
4753 while (1) {
4754 if (path->slots[0] == 0) {
4755 ret = btrfs_prev_leaf(root, path);
4756 if (ret != 0)
4757 return ret;
4758 } else {
4759 path->slots[0]--;
4760 }
4761 leaf = path->nodes[0];
4762 nritems = btrfs_header_nritems(leaf);
4763 if (nritems == 0)
4764 return 1;
4765 if (path->slots[0] == nritems)
4766 path->slots[0]--;
4767
4768 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4769 if (found_key.objectid < min_objectid)
4770 break;
4771 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4772 found_key.type == BTRFS_METADATA_ITEM_KEY)
4773 return 0;
4774 if (found_key.objectid == min_objectid &&
4775 found_key.type < BTRFS_EXTENT_ITEM_KEY)
4776 break;
4777 }
4778 return 1;
4779}
4780