1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/slab.h>
8#include <linux/rbtree.h>
9#include <linux/mm.h>
10#include "ctree.h"
11#include "disk-io.h"
12#include "transaction.h"
13#include "print-tree.h"
14#include "locking.h"
15#include "volumes.h"
16#include "qgroup.h"
17#include "tree-mod-log.h"
18
19static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
20 *root, struct btrfs_path *path, int level);
21static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
22 const struct btrfs_key *ins_key, struct btrfs_path *path,
23 int data_size, int extend);
24static int push_node_left(struct btrfs_trans_handle *trans,
25 struct extent_buffer *dst,
26 struct extent_buffer *src, int empty);
27static int balance_node_right(struct btrfs_trans_handle *trans,
28 struct extent_buffer *dst_buf,
29 struct extent_buffer *src_buf);
30static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
31 int level, int slot);
32
33static const struct btrfs_csums {
34 u16 size;
35 const char name[10];
36 const char driver[12];
37} btrfs_csums[] = {
38 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
39 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
40 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
41 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
42 .driver = "blake2b-256" },
43};
44
45int btrfs_super_csum_size(const struct btrfs_super_block *s)
46{
47 u16 t = btrfs_super_csum_type(s);
48
49
50
51 return btrfs_csums[t].size;
52}
53
54const char *btrfs_super_csum_name(u16 csum_type)
55{
56
57 return btrfs_csums[csum_type].name;
58}
59
60
61
62
63
64const char *btrfs_super_csum_driver(u16 csum_type)
65{
66
67 return btrfs_csums[csum_type].driver[0] ?
68 btrfs_csums[csum_type].driver :
69 btrfs_csums[csum_type].name;
70}
71
72size_t __attribute_const__ btrfs_get_num_csums(void)
73{
74 return ARRAY_SIZE(btrfs_csums);
75}
76
77struct btrfs_path *btrfs_alloc_path(void)
78{
79 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
80}
81
82
83void btrfs_free_path(struct btrfs_path *p)
84{
85 if (!p)
86 return;
87 btrfs_release_path(p);
88 kmem_cache_free(btrfs_path_cachep, p);
89}
90
91
92
93
94
95
96
97noinline void btrfs_release_path(struct btrfs_path *p)
98{
99 int i;
100
101 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
102 p->slots[i] = 0;
103 if (!p->nodes[i])
104 continue;
105 if (p->locks[i]) {
106 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
107 p->locks[i] = 0;
108 }
109 free_extent_buffer(p->nodes[i]);
110 p->nodes[i] = NULL;
111 }
112}
113
114
115
116
117
118
119
120
121
122
123
124struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
125{
126 struct extent_buffer *eb;
127
128 while (1) {
129 rcu_read_lock();
130 eb = rcu_dereference(root->node);
131
132
133
134
135
136
137
138 if (atomic_inc_not_zero(&eb->refs)) {
139 rcu_read_unlock();
140 break;
141 }
142 rcu_read_unlock();
143 synchronize_rcu();
144 }
145 return eb;
146}
147
148
149
150
151
152
153static void add_root_to_dirty_list(struct btrfs_root *root)
154{
155 struct btrfs_fs_info *fs_info = root->fs_info;
156
157 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
158 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
159 return;
160
161 spin_lock(&fs_info->trans_lock);
162 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
163
164 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
165 list_move_tail(&root->dirty_list,
166 &fs_info->dirty_cowonly_roots);
167 else
168 list_move(&root->dirty_list,
169 &fs_info->dirty_cowonly_roots);
170 }
171 spin_unlock(&fs_info->trans_lock);
172}
173
174
175
176
177
178
179int btrfs_copy_root(struct btrfs_trans_handle *trans,
180 struct btrfs_root *root,
181 struct extent_buffer *buf,
182 struct extent_buffer **cow_ret, u64 new_root_objectid)
183{
184 struct btrfs_fs_info *fs_info = root->fs_info;
185 struct extent_buffer *cow;
186 int ret = 0;
187 int level;
188 struct btrfs_disk_key disk_key;
189
190 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
191 trans->transid != fs_info->running_transaction->transid);
192 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
193 trans->transid != root->last_trans);
194
195 level = btrfs_header_level(buf);
196 if (level == 0)
197 btrfs_item_key(buf, &disk_key, 0);
198 else
199 btrfs_node_key(buf, &disk_key, 0);
200
201 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
202 &disk_key, level, buf->start, 0,
203 BTRFS_NESTING_NEW_ROOT);
204 if (IS_ERR(cow))
205 return PTR_ERR(cow);
206
207 copy_extent_buffer_full(cow, buf);
208 btrfs_set_header_bytenr(cow, cow->start);
209 btrfs_set_header_generation(cow, trans->transid);
210 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
211 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
212 BTRFS_HEADER_FLAG_RELOC);
213 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
214 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
215 else
216 btrfs_set_header_owner(cow, new_root_objectid);
217
218 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
219
220 WARN_ON(btrfs_header_generation(buf) > trans->transid);
221 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
222 ret = btrfs_inc_ref(trans, root, cow, 1);
223 else
224 ret = btrfs_inc_ref(trans, root, cow, 0);
225 if (ret) {
226 btrfs_tree_unlock(cow);
227 free_extent_buffer(cow);
228 btrfs_abort_transaction(trans, ret);
229 return ret;
230 }
231
232 btrfs_mark_buffer_dirty(cow);
233 *cow_ret = cow;
234 return 0;
235}
236
237
238
239
240int btrfs_block_can_be_shared(struct btrfs_root *root,
241 struct extent_buffer *buf)
242{
243
244
245
246
247
248 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
249 buf != root->node && buf != root->commit_root &&
250 (btrfs_header_generation(buf) <=
251 btrfs_root_last_snapshot(&root->root_item) ||
252 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
253 return 1;
254
255 return 0;
256}
257
258static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
259 struct btrfs_root *root,
260 struct extent_buffer *buf,
261 struct extent_buffer *cow,
262 int *last_ref)
263{
264 struct btrfs_fs_info *fs_info = root->fs_info;
265 u64 refs;
266 u64 owner;
267 u64 flags;
268 u64 new_flags = 0;
269 int ret;
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288 if (btrfs_block_can_be_shared(root, buf)) {
289 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
290 btrfs_header_level(buf), 1,
291 &refs, &flags);
292 if (ret)
293 return ret;
294 if (refs == 0) {
295 ret = -EROFS;
296 btrfs_handle_fs_error(fs_info, ret, NULL);
297 return ret;
298 }
299 } else {
300 refs = 1;
301 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
302 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
303 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
304 else
305 flags = 0;
306 }
307
308 owner = btrfs_header_owner(buf);
309 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
310 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
311
312 if (refs > 1) {
313 if ((owner == root->root_key.objectid ||
314 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
315 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
316 ret = btrfs_inc_ref(trans, root, buf, 1);
317 if (ret)
318 return ret;
319
320 if (root->root_key.objectid ==
321 BTRFS_TREE_RELOC_OBJECTID) {
322 ret = btrfs_dec_ref(trans, root, buf, 0);
323 if (ret)
324 return ret;
325 ret = btrfs_inc_ref(trans, root, cow, 1);
326 if (ret)
327 return ret;
328 }
329 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
330 } else {
331
332 if (root->root_key.objectid ==
333 BTRFS_TREE_RELOC_OBJECTID)
334 ret = btrfs_inc_ref(trans, root, cow, 1);
335 else
336 ret = btrfs_inc_ref(trans, root, cow, 0);
337 if (ret)
338 return ret;
339 }
340 if (new_flags != 0) {
341 int level = btrfs_header_level(buf);
342
343 ret = btrfs_set_disk_extent_flags(trans, buf,
344 new_flags, level, 0);
345 if (ret)
346 return ret;
347 }
348 } else {
349 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
350 if (root->root_key.objectid ==
351 BTRFS_TREE_RELOC_OBJECTID)
352 ret = btrfs_inc_ref(trans, root, cow, 1);
353 else
354 ret = btrfs_inc_ref(trans, root, cow, 0);
355 if (ret)
356 return ret;
357 ret = btrfs_dec_ref(trans, root, buf, 1);
358 if (ret)
359 return ret;
360 }
361 btrfs_clean_tree_block(buf);
362 *last_ref = 1;
363 }
364 return 0;
365}
366
367static struct extent_buffer *alloc_tree_block_no_bg_flush(
368 struct btrfs_trans_handle *trans,
369 struct btrfs_root *root,
370 u64 parent_start,
371 const struct btrfs_disk_key *disk_key,
372 int level,
373 u64 hint,
374 u64 empty_size,
375 enum btrfs_lock_nesting nest)
376{
377 struct btrfs_fs_info *fs_info = root->fs_info;
378 struct extent_buffer *ret;
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396 if (root == fs_info->extent_root ||
397 root == fs_info->chunk_root ||
398 root == fs_info->dev_root ||
399 root == fs_info->free_space_root)
400 trans->can_flush_pending_bgs = false;
401
402 ret = btrfs_alloc_tree_block(trans, root, parent_start,
403 root->root_key.objectid, disk_key, level,
404 hint, empty_size, nest);
405 trans->can_flush_pending_bgs = true;
406
407 return ret;
408}
409
410
411
412
413
414
415
416
417
418
419
420
421
422static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
423 struct btrfs_root *root,
424 struct extent_buffer *buf,
425 struct extent_buffer *parent, int parent_slot,
426 struct extent_buffer **cow_ret,
427 u64 search_start, u64 empty_size,
428 enum btrfs_lock_nesting nest)
429{
430 struct btrfs_fs_info *fs_info = root->fs_info;
431 struct btrfs_disk_key disk_key;
432 struct extent_buffer *cow;
433 int level, ret;
434 int last_ref = 0;
435 int unlock_orig = 0;
436 u64 parent_start = 0;
437
438 if (*cow_ret == buf)
439 unlock_orig = 1;
440
441 btrfs_assert_tree_locked(buf);
442
443 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
444 trans->transid != fs_info->running_transaction->transid);
445 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
446 trans->transid != root->last_trans);
447
448 level = btrfs_header_level(buf);
449
450 if (level == 0)
451 btrfs_item_key(buf, &disk_key, 0);
452 else
453 btrfs_node_key(buf, &disk_key, 0);
454
455 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
456 parent_start = parent->start;
457
458 cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
459 level, search_start, empty_size, nest);
460 if (IS_ERR(cow))
461 return PTR_ERR(cow);
462
463
464
465 copy_extent_buffer_full(cow, buf);
466 btrfs_set_header_bytenr(cow, cow->start);
467 btrfs_set_header_generation(cow, trans->transid);
468 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
469 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
470 BTRFS_HEADER_FLAG_RELOC);
471 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
472 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
473 else
474 btrfs_set_header_owner(cow, root->root_key.objectid);
475
476 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
477
478 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
479 if (ret) {
480 btrfs_tree_unlock(cow);
481 free_extent_buffer(cow);
482 btrfs_abort_transaction(trans, ret);
483 return ret;
484 }
485
486 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
487 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
488 if (ret) {
489 btrfs_tree_unlock(cow);
490 free_extent_buffer(cow);
491 btrfs_abort_transaction(trans, ret);
492 return ret;
493 }
494 }
495
496 if (buf == root->node) {
497 WARN_ON(parent && parent != buf);
498 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
499 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
500 parent_start = buf->start;
501
502 atomic_inc(&cow->refs);
503 ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
504 BUG_ON(ret < 0);
505 rcu_assign_pointer(root->node, cow);
506
507 btrfs_free_tree_block(trans, root, buf, parent_start,
508 last_ref);
509 free_extent_buffer(buf);
510 add_root_to_dirty_list(root);
511 } else {
512 WARN_ON(trans->transid != btrfs_header_generation(parent));
513 btrfs_tree_mod_log_insert_key(parent, parent_slot,
514 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
515 btrfs_set_node_blockptr(parent, parent_slot,
516 cow->start);
517 btrfs_set_node_ptr_generation(parent, parent_slot,
518 trans->transid);
519 btrfs_mark_buffer_dirty(parent);
520 if (last_ref) {
521 ret = btrfs_tree_mod_log_free_eb(buf);
522 if (ret) {
523 btrfs_tree_unlock(cow);
524 free_extent_buffer(cow);
525 btrfs_abort_transaction(trans, ret);
526 return ret;
527 }
528 }
529 btrfs_free_tree_block(trans, root, buf, parent_start,
530 last_ref);
531 }
532 if (unlock_orig)
533 btrfs_tree_unlock(buf);
534 free_extent_buffer_stale(buf);
535 btrfs_mark_buffer_dirty(cow);
536 *cow_ret = cow;
537 return 0;
538}
539
540static inline int should_cow_block(struct btrfs_trans_handle *trans,
541 struct btrfs_root *root,
542 struct extent_buffer *buf)
543{
544 if (btrfs_is_testing(root->fs_info))
545 return 0;
546
547
548 smp_mb__before_atomic();
549
550
551
552
553
554
555
556
557
558
559
560
561 if (btrfs_header_generation(buf) == trans->transid &&
562 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
563 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
564 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
565 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
566 return 0;
567 return 1;
568}
569
570
571
572
573
574
575noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
576 struct btrfs_root *root, struct extent_buffer *buf,
577 struct extent_buffer *parent, int parent_slot,
578 struct extent_buffer **cow_ret,
579 enum btrfs_lock_nesting nest)
580{
581 struct btrfs_fs_info *fs_info = root->fs_info;
582 u64 search_start;
583 int ret;
584
585 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
586 btrfs_err(fs_info,
587 "COW'ing blocks on a fs root that's being dropped");
588
589 if (trans->transaction != fs_info->running_transaction)
590 WARN(1, KERN_CRIT "trans %llu running %llu\n",
591 trans->transid,
592 fs_info->running_transaction->transid);
593
594 if (trans->transid != fs_info->generation)
595 WARN(1, KERN_CRIT "trans %llu running %llu\n",
596 trans->transid, fs_info->generation);
597
598 if (!should_cow_block(trans, root, buf)) {
599 trans->dirty = true;
600 *cow_ret = buf;
601 return 0;
602 }
603
604 search_start = buf->start & ~((u64)SZ_1G - 1);
605
606
607
608
609
610
611
612 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
613 ret = __btrfs_cow_block(trans, root, buf, parent,
614 parent_slot, cow_ret, search_start, 0, nest);
615
616 trace_btrfs_cow_block(root, buf, *cow_ret);
617
618 return ret;
619}
620ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
621
622
623
624
625
626static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
627{
628 if (blocknr < other && other - (blocknr + blocksize) < 32768)
629 return 1;
630 if (blocknr > other && blocknr - (other + blocksize) < 32768)
631 return 1;
632 return 0;
633}
634
635#ifdef __LITTLE_ENDIAN
636
637
638
639
640
641static int comp_keys(const struct btrfs_disk_key *disk_key,
642 const struct btrfs_key *k2)
643{
644 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
645
646 return btrfs_comp_cpu_keys(k1, k2);
647}
648
649#else
650
651
652
653
654static int comp_keys(const struct btrfs_disk_key *disk,
655 const struct btrfs_key *k2)
656{
657 struct btrfs_key k1;
658
659 btrfs_disk_key_to_cpu(&k1, disk);
660
661 return btrfs_comp_cpu_keys(&k1, k2);
662}
663#endif
664
665
666
667
668int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
669{
670 if (k1->objectid > k2->objectid)
671 return 1;
672 if (k1->objectid < k2->objectid)
673 return -1;
674 if (k1->type > k2->type)
675 return 1;
676 if (k1->type < k2->type)
677 return -1;
678 if (k1->offset > k2->offset)
679 return 1;
680 if (k1->offset < k2->offset)
681 return -1;
682 return 0;
683}
684
685
686
687
688
689
690int btrfs_realloc_node(struct btrfs_trans_handle *trans,
691 struct btrfs_root *root, struct extent_buffer *parent,
692 int start_slot, u64 *last_ret,
693 struct btrfs_key *progress)
694{
695 struct btrfs_fs_info *fs_info = root->fs_info;
696 struct extent_buffer *cur;
697 u64 blocknr;
698 u64 search_start = *last_ret;
699 u64 last_block = 0;
700 u64 other;
701 u32 parent_nritems;
702 int end_slot;
703 int i;
704 int err = 0;
705 u32 blocksize;
706 int progress_passed = 0;
707 struct btrfs_disk_key disk_key;
708
709 WARN_ON(trans->transaction != fs_info->running_transaction);
710 WARN_ON(trans->transid != fs_info->generation);
711
712 parent_nritems = btrfs_header_nritems(parent);
713 blocksize = fs_info->nodesize;
714 end_slot = parent_nritems - 1;
715
716 if (parent_nritems <= 1)
717 return 0;
718
719 for (i = start_slot; i <= end_slot; i++) {
720 int close = 1;
721
722 btrfs_node_key(parent, &disk_key, i);
723 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
724 continue;
725
726 progress_passed = 1;
727 blocknr = btrfs_node_blockptr(parent, i);
728 if (last_block == 0)
729 last_block = blocknr;
730
731 if (i > 0) {
732 other = btrfs_node_blockptr(parent, i - 1);
733 close = close_blocks(blocknr, other, blocksize);
734 }
735 if (!close && i < end_slot) {
736 other = btrfs_node_blockptr(parent, i + 1);
737 close = close_blocks(blocknr, other, blocksize);
738 }
739 if (close) {
740 last_block = blocknr;
741 continue;
742 }
743
744 cur = btrfs_read_node_slot(parent, i);
745 if (IS_ERR(cur))
746 return PTR_ERR(cur);
747 if (search_start == 0)
748 search_start = last_block;
749
750 btrfs_tree_lock(cur);
751 err = __btrfs_cow_block(trans, root, cur, parent, i,
752 &cur, search_start,
753 min(16 * blocksize,
754 (end_slot - i) * blocksize),
755 BTRFS_NESTING_COW);
756 if (err) {
757 btrfs_tree_unlock(cur);
758 free_extent_buffer(cur);
759 break;
760 }
761 search_start = cur->start;
762 last_block = cur->start;
763 *last_ret = search_start;
764 btrfs_tree_unlock(cur);
765 free_extent_buffer(cur);
766 }
767 return err;
768}
769
770
771
772
773
774
775
776
777
778
779
780static noinline int generic_bin_search(struct extent_buffer *eb,
781 unsigned long p, int item_size,
782 const struct btrfs_key *key,
783 int max, int *slot)
784{
785 int low = 0;
786 int high = max;
787 int ret;
788 const int key_size = sizeof(struct btrfs_disk_key);
789
790 if (low > high) {
791 btrfs_err(eb->fs_info,
792 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
793 __func__, low, high, eb->start,
794 btrfs_header_owner(eb), btrfs_header_level(eb));
795 return -EINVAL;
796 }
797
798 while (low < high) {
799 unsigned long oip;
800 unsigned long offset;
801 struct btrfs_disk_key *tmp;
802 struct btrfs_disk_key unaligned;
803 int mid;
804
805 mid = (low + high) / 2;
806 offset = p + mid * item_size;
807 oip = offset_in_page(offset);
808
809 if (oip + key_size <= PAGE_SIZE) {
810 const unsigned long idx = get_eb_page_index(offset);
811 char *kaddr = page_address(eb->pages[idx]);
812
813 oip = get_eb_offset_in_page(eb, offset);
814 tmp = (struct btrfs_disk_key *)(kaddr + oip);
815 } else {
816 read_extent_buffer(eb, &unaligned, offset, key_size);
817 tmp = &unaligned;
818 }
819
820 ret = comp_keys(tmp, key);
821
822 if (ret < 0)
823 low = mid + 1;
824 else if (ret > 0)
825 high = mid;
826 else {
827 *slot = mid;
828 return 0;
829 }
830 }
831 *slot = low;
832 return 1;
833}
834
835
836
837
838
839int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
840 int *slot)
841{
842 if (btrfs_header_level(eb) == 0)
843 return generic_bin_search(eb,
844 offsetof(struct btrfs_leaf, items),
845 sizeof(struct btrfs_item),
846 key, btrfs_header_nritems(eb),
847 slot);
848 else
849 return generic_bin_search(eb,
850 offsetof(struct btrfs_node, ptrs),
851 sizeof(struct btrfs_key_ptr),
852 key, btrfs_header_nritems(eb),
853 slot);
854}
855
856static void root_add_used(struct btrfs_root *root, u32 size)
857{
858 spin_lock(&root->accounting_lock);
859 btrfs_set_root_used(&root->root_item,
860 btrfs_root_used(&root->root_item) + size);
861 spin_unlock(&root->accounting_lock);
862}
863
864static void root_sub_used(struct btrfs_root *root, u32 size)
865{
866 spin_lock(&root->accounting_lock);
867 btrfs_set_root_used(&root->root_item,
868 btrfs_root_used(&root->root_item) - size);
869 spin_unlock(&root->accounting_lock);
870}
871
872
873
874
875struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
876 int slot)
877{
878 int level = btrfs_header_level(parent);
879 struct extent_buffer *eb;
880 struct btrfs_key first_key;
881
882 if (slot < 0 || slot >= btrfs_header_nritems(parent))
883 return ERR_PTR(-ENOENT);
884
885 BUG_ON(level == 0);
886
887 btrfs_node_key_to_cpu(parent, &first_key, slot);
888 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
889 btrfs_header_owner(parent),
890 btrfs_node_ptr_generation(parent, slot),
891 level - 1, &first_key);
892 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
893 free_extent_buffer(eb);
894 eb = ERR_PTR(-EIO);
895 }
896
897 return eb;
898}
899
900
901
902
903
904
905static noinline int balance_level(struct btrfs_trans_handle *trans,
906 struct btrfs_root *root,
907 struct btrfs_path *path, int level)
908{
909 struct btrfs_fs_info *fs_info = root->fs_info;
910 struct extent_buffer *right = NULL;
911 struct extent_buffer *mid;
912 struct extent_buffer *left = NULL;
913 struct extent_buffer *parent = NULL;
914 int ret = 0;
915 int wret;
916 int pslot;
917 int orig_slot = path->slots[level];
918 u64 orig_ptr;
919
920 ASSERT(level > 0);
921
922 mid = path->nodes[level];
923
924 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
925 WARN_ON(btrfs_header_generation(mid) != trans->transid);
926
927 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
928
929 if (level < BTRFS_MAX_LEVEL - 1) {
930 parent = path->nodes[level + 1];
931 pslot = path->slots[level + 1];
932 }
933
934
935
936
937
938 if (!parent) {
939 struct extent_buffer *child;
940
941 if (btrfs_header_nritems(mid) != 1)
942 return 0;
943
944
945 child = btrfs_read_node_slot(mid, 0);
946 if (IS_ERR(child)) {
947 ret = PTR_ERR(child);
948 btrfs_handle_fs_error(fs_info, ret, NULL);
949 goto enospc;
950 }
951
952 btrfs_tree_lock(child);
953 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
954 BTRFS_NESTING_COW);
955 if (ret) {
956 btrfs_tree_unlock(child);
957 free_extent_buffer(child);
958 goto enospc;
959 }
960
961 ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
962 BUG_ON(ret < 0);
963 rcu_assign_pointer(root->node, child);
964
965 add_root_to_dirty_list(root);
966 btrfs_tree_unlock(child);
967
968 path->locks[level] = 0;
969 path->nodes[level] = NULL;
970 btrfs_clean_tree_block(mid);
971 btrfs_tree_unlock(mid);
972
973 free_extent_buffer(mid);
974
975 root_sub_used(root, mid->len);
976 btrfs_free_tree_block(trans, root, mid, 0, 1);
977
978 free_extent_buffer_stale(mid);
979 return 0;
980 }
981 if (btrfs_header_nritems(mid) >
982 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
983 return 0;
984
985 left = btrfs_read_node_slot(parent, pslot - 1);
986 if (IS_ERR(left))
987 left = NULL;
988
989 if (left) {
990 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
991 wret = btrfs_cow_block(trans, root, left,
992 parent, pslot - 1, &left,
993 BTRFS_NESTING_LEFT_COW);
994 if (wret) {
995 ret = wret;
996 goto enospc;
997 }
998 }
999
1000 right = btrfs_read_node_slot(parent, pslot + 1);
1001 if (IS_ERR(right))
1002 right = NULL;
1003
1004 if (right) {
1005 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
1006 wret = btrfs_cow_block(trans, root, right,
1007 parent, pslot + 1, &right,
1008 BTRFS_NESTING_RIGHT_COW);
1009 if (wret) {
1010 ret = wret;
1011 goto enospc;
1012 }
1013 }
1014
1015
1016 if (left) {
1017 orig_slot += btrfs_header_nritems(left);
1018 wret = push_node_left(trans, left, mid, 1);
1019 if (wret < 0)
1020 ret = wret;
1021 }
1022
1023
1024
1025
1026 if (right) {
1027 wret = push_node_left(trans, mid, right, 1);
1028 if (wret < 0 && wret != -ENOSPC)
1029 ret = wret;
1030 if (btrfs_header_nritems(right) == 0) {
1031 btrfs_clean_tree_block(right);
1032 btrfs_tree_unlock(right);
1033 del_ptr(root, path, level + 1, pslot + 1);
1034 root_sub_used(root, right->len);
1035 btrfs_free_tree_block(trans, root, right, 0, 1);
1036 free_extent_buffer_stale(right);
1037 right = NULL;
1038 } else {
1039 struct btrfs_disk_key right_key;
1040 btrfs_node_key(right, &right_key, 0);
1041 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1042 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1043 BUG_ON(ret < 0);
1044 btrfs_set_node_key(parent, &right_key, pslot + 1);
1045 btrfs_mark_buffer_dirty(parent);
1046 }
1047 }
1048 if (btrfs_header_nritems(mid) == 1) {
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058 if (!left) {
1059 ret = -EROFS;
1060 btrfs_handle_fs_error(fs_info, ret, NULL);
1061 goto enospc;
1062 }
1063 wret = balance_node_right(trans, mid, left);
1064 if (wret < 0) {
1065 ret = wret;
1066 goto enospc;
1067 }
1068 if (wret == 1) {
1069 wret = push_node_left(trans, left, mid, 1);
1070 if (wret < 0)
1071 ret = wret;
1072 }
1073 BUG_ON(wret == 1);
1074 }
1075 if (btrfs_header_nritems(mid) == 0) {
1076 btrfs_clean_tree_block(mid);
1077 btrfs_tree_unlock(mid);
1078 del_ptr(root, path, level + 1, pslot);
1079 root_sub_used(root, mid->len);
1080 btrfs_free_tree_block(trans, root, mid, 0, 1);
1081 free_extent_buffer_stale(mid);
1082 mid = NULL;
1083 } else {
1084
1085 struct btrfs_disk_key mid_key;
1086 btrfs_node_key(mid, &mid_key, 0);
1087 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1088 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1089 BUG_ON(ret < 0);
1090 btrfs_set_node_key(parent, &mid_key, pslot);
1091 btrfs_mark_buffer_dirty(parent);
1092 }
1093
1094
1095 if (left) {
1096 if (btrfs_header_nritems(left) > orig_slot) {
1097 atomic_inc(&left->refs);
1098
1099 path->nodes[level] = left;
1100 path->slots[level + 1] -= 1;
1101 path->slots[level] = orig_slot;
1102 if (mid) {
1103 btrfs_tree_unlock(mid);
1104 free_extent_buffer(mid);
1105 }
1106 } else {
1107 orig_slot -= btrfs_header_nritems(left);
1108 path->slots[level] = orig_slot;
1109 }
1110 }
1111
1112 if (orig_ptr !=
1113 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1114 BUG();
1115enospc:
1116 if (right) {
1117 btrfs_tree_unlock(right);
1118 free_extent_buffer(right);
1119 }
1120 if (left) {
1121 if (path->nodes[level] != left)
1122 btrfs_tree_unlock(left);
1123 free_extent_buffer(left);
1124 }
1125 return ret;
1126}
1127
1128
1129
1130
1131
1132static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1133 struct btrfs_root *root,
1134 struct btrfs_path *path, int level)
1135{
1136 struct btrfs_fs_info *fs_info = root->fs_info;
1137 struct extent_buffer *right = NULL;
1138 struct extent_buffer *mid;
1139 struct extent_buffer *left = NULL;
1140 struct extent_buffer *parent = NULL;
1141 int ret = 0;
1142 int wret;
1143 int pslot;
1144 int orig_slot = path->slots[level];
1145
1146 if (level == 0)
1147 return 1;
1148
1149 mid = path->nodes[level];
1150 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1151
1152 if (level < BTRFS_MAX_LEVEL - 1) {
1153 parent = path->nodes[level + 1];
1154 pslot = path->slots[level + 1];
1155 }
1156
1157 if (!parent)
1158 return 1;
1159
1160 left = btrfs_read_node_slot(parent, pslot - 1);
1161 if (IS_ERR(left))
1162 left = NULL;
1163
1164
1165 if (left) {
1166 u32 left_nr;
1167
1168 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
1169
1170 left_nr = btrfs_header_nritems(left);
1171 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1172 wret = 1;
1173 } else {
1174 ret = btrfs_cow_block(trans, root, left, parent,
1175 pslot - 1, &left,
1176 BTRFS_NESTING_LEFT_COW);
1177 if (ret)
1178 wret = 1;
1179 else {
1180 wret = push_node_left(trans, left, mid, 0);
1181 }
1182 }
1183 if (wret < 0)
1184 ret = wret;
1185 if (wret == 0) {
1186 struct btrfs_disk_key disk_key;
1187 orig_slot += left_nr;
1188 btrfs_node_key(mid, &disk_key, 0);
1189 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1190 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1191 BUG_ON(ret < 0);
1192 btrfs_set_node_key(parent, &disk_key, pslot);
1193 btrfs_mark_buffer_dirty(parent);
1194 if (btrfs_header_nritems(left) > orig_slot) {
1195 path->nodes[level] = left;
1196 path->slots[level + 1] -= 1;
1197 path->slots[level] = orig_slot;
1198 btrfs_tree_unlock(mid);
1199 free_extent_buffer(mid);
1200 } else {
1201 orig_slot -=
1202 btrfs_header_nritems(left);
1203 path->slots[level] = orig_slot;
1204 btrfs_tree_unlock(left);
1205 free_extent_buffer(left);
1206 }
1207 return 0;
1208 }
1209 btrfs_tree_unlock(left);
1210 free_extent_buffer(left);
1211 }
1212 right = btrfs_read_node_slot(parent, pslot + 1);
1213 if (IS_ERR(right))
1214 right = NULL;
1215
1216
1217
1218
1219 if (right) {
1220 u32 right_nr;
1221
1222 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
1223
1224 right_nr = btrfs_header_nritems(right);
1225 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1226 wret = 1;
1227 } else {
1228 ret = btrfs_cow_block(trans, root, right,
1229 parent, pslot + 1,
1230 &right, BTRFS_NESTING_RIGHT_COW);
1231 if (ret)
1232 wret = 1;
1233 else {
1234 wret = balance_node_right(trans, right, mid);
1235 }
1236 }
1237 if (wret < 0)
1238 ret = wret;
1239 if (wret == 0) {
1240 struct btrfs_disk_key disk_key;
1241
1242 btrfs_node_key(right, &disk_key, 0);
1243 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1244 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1245 BUG_ON(ret < 0);
1246 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1247 btrfs_mark_buffer_dirty(parent);
1248
1249 if (btrfs_header_nritems(mid) <= orig_slot) {
1250 path->nodes[level] = right;
1251 path->slots[level + 1] += 1;
1252 path->slots[level] = orig_slot -
1253 btrfs_header_nritems(mid);
1254 btrfs_tree_unlock(mid);
1255 free_extent_buffer(mid);
1256 } else {
1257 btrfs_tree_unlock(right);
1258 free_extent_buffer(right);
1259 }
1260 return 0;
1261 }
1262 btrfs_tree_unlock(right);
1263 free_extent_buffer(right);
1264 }
1265 return 1;
1266}
1267
1268
1269
1270
1271
1272static void reada_for_search(struct btrfs_fs_info *fs_info,
1273 struct btrfs_path *path,
1274 int level, int slot, u64 objectid)
1275{
1276 struct extent_buffer *node;
1277 struct btrfs_disk_key disk_key;
1278 u32 nritems;
1279 u64 search;
1280 u64 target;
1281 u64 nread = 0;
1282 u64 nread_max;
1283 struct extent_buffer *eb;
1284 u32 nr;
1285 u32 blocksize;
1286 u32 nscan = 0;
1287
1288 if (level != 1 && path->reada != READA_FORWARD_ALWAYS)
1289 return;
1290
1291 if (!path->nodes[level])
1292 return;
1293
1294 node = path->nodes[level];
1295
1296
1297
1298
1299
1300
1301 if (path->reada == READA_FORWARD_ALWAYS) {
1302 if (level > 1)
1303 nread_max = node->fs_info->nodesize;
1304 else
1305 nread_max = SZ_128K;
1306 } else {
1307 nread_max = SZ_64K;
1308 }
1309
1310 search = btrfs_node_blockptr(node, slot);
1311 blocksize = fs_info->nodesize;
1312 eb = find_extent_buffer(fs_info, search);
1313 if (eb) {
1314 free_extent_buffer(eb);
1315 return;
1316 }
1317
1318 target = search;
1319
1320 nritems = btrfs_header_nritems(node);
1321 nr = slot;
1322
1323 while (1) {
1324 if (path->reada == READA_BACK) {
1325 if (nr == 0)
1326 break;
1327 nr--;
1328 } else if (path->reada == READA_FORWARD ||
1329 path->reada == READA_FORWARD_ALWAYS) {
1330 nr++;
1331 if (nr >= nritems)
1332 break;
1333 }
1334 if (path->reada == READA_BACK && objectid) {
1335 btrfs_node_key(node, &disk_key, nr);
1336 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1337 break;
1338 }
1339 search = btrfs_node_blockptr(node, nr);
1340 if (path->reada == READA_FORWARD_ALWAYS ||
1341 (search <= target && target - search <= 65536) ||
1342 (search > target && search - target <= 65536)) {
1343 btrfs_readahead_node_child(node, nr);
1344 nread += blocksize;
1345 }
1346 nscan++;
1347 if (nread > nread_max || nscan > 32)
1348 break;
1349 }
1350}
1351
1352static noinline void reada_for_balance(struct btrfs_path *path, int level)
1353{
1354 struct extent_buffer *parent;
1355 int slot;
1356 int nritems;
1357
1358 parent = path->nodes[level + 1];
1359 if (!parent)
1360 return;
1361
1362 nritems = btrfs_header_nritems(parent);
1363 slot = path->slots[level + 1];
1364
1365 if (slot > 0)
1366 btrfs_readahead_node_child(parent, slot - 1);
1367 if (slot + 1 < nritems)
1368 btrfs_readahead_node_child(parent, slot + 1);
1369}
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385static noinline void unlock_up(struct btrfs_path *path, int level,
1386 int lowest_unlock, int min_write_lock_level,
1387 int *write_lock_level)
1388{
1389 int i;
1390 int skip_level = level;
1391 int no_skips = 0;
1392 struct extent_buffer *t;
1393
1394 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1395 if (!path->nodes[i])
1396 break;
1397 if (!path->locks[i])
1398 break;
1399 if (!no_skips && path->slots[i] == 0) {
1400 skip_level = i + 1;
1401 continue;
1402 }
1403 if (!no_skips && path->keep_locks) {
1404 u32 nritems;
1405 t = path->nodes[i];
1406 nritems = btrfs_header_nritems(t);
1407 if (nritems < 1 || path->slots[i] >= nritems - 1) {
1408 skip_level = i + 1;
1409 continue;
1410 }
1411 }
1412 if (skip_level < i && i >= lowest_unlock)
1413 no_skips = 1;
1414
1415 t = path->nodes[i];
1416 if (i >= lowest_unlock && i > skip_level) {
1417 btrfs_tree_unlock_rw(t, path->locks[i]);
1418 path->locks[i] = 0;
1419 if (write_lock_level &&
1420 i > min_write_lock_level &&
1421 i <= *write_lock_level) {
1422 *write_lock_level = i - 1;
1423 }
1424 }
1425 }
1426}
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436static int
1437read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
1438 struct extent_buffer **eb_ret, int level, int slot,
1439 const struct btrfs_key *key)
1440{
1441 struct btrfs_fs_info *fs_info = root->fs_info;
1442 u64 blocknr;
1443 u64 gen;
1444 struct extent_buffer *tmp;
1445 struct btrfs_key first_key;
1446 int ret;
1447 int parent_level;
1448
1449 blocknr = btrfs_node_blockptr(*eb_ret, slot);
1450 gen = btrfs_node_ptr_generation(*eb_ret, slot);
1451 parent_level = btrfs_header_level(*eb_ret);
1452 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
1453
1454 tmp = find_extent_buffer(fs_info, blocknr);
1455 if (tmp) {
1456 if (p->reada == READA_FORWARD_ALWAYS)
1457 reada_for_search(fs_info, p, level, slot, key->objectid);
1458
1459
1460 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
1461
1462
1463
1464
1465
1466 if (btrfs_verify_level_key(tmp,
1467 parent_level - 1, &first_key, gen)) {
1468 free_extent_buffer(tmp);
1469 return -EUCLEAN;
1470 }
1471 *eb_ret = tmp;
1472 return 0;
1473 }
1474
1475
1476 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
1477 if (!ret) {
1478 *eb_ret = tmp;
1479 return 0;
1480 }
1481 free_extent_buffer(tmp);
1482 btrfs_release_path(p);
1483 return -EIO;
1484 }
1485
1486
1487
1488
1489
1490
1491
1492
1493 btrfs_unlock_up_safe(p, level + 1);
1494
1495 if (p->reada != READA_NONE)
1496 reada_for_search(fs_info, p, level, slot, key->objectid);
1497
1498 ret = -EAGAIN;
1499 tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
1500 gen, parent_level - 1, &first_key);
1501 if (!IS_ERR(tmp)) {
1502
1503
1504
1505
1506
1507
1508 if (!extent_buffer_uptodate(tmp))
1509 ret = -EIO;
1510 free_extent_buffer(tmp);
1511 } else {
1512 ret = PTR_ERR(tmp);
1513 }
1514
1515 btrfs_release_path(p);
1516 return ret;
1517}
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528static int
1529setup_nodes_for_search(struct btrfs_trans_handle *trans,
1530 struct btrfs_root *root, struct btrfs_path *p,
1531 struct extent_buffer *b, int level, int ins_len,
1532 int *write_lock_level)
1533{
1534 struct btrfs_fs_info *fs_info = root->fs_info;
1535 int ret = 0;
1536
1537 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1538 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
1539
1540 if (*write_lock_level < level + 1) {
1541 *write_lock_level = level + 1;
1542 btrfs_release_path(p);
1543 return -EAGAIN;
1544 }
1545
1546 reada_for_balance(p, level);
1547 ret = split_node(trans, root, p, level);
1548
1549 b = p->nodes[level];
1550 } else if (ins_len < 0 && btrfs_header_nritems(b) <
1551 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
1552
1553 if (*write_lock_level < level + 1) {
1554 *write_lock_level = level + 1;
1555 btrfs_release_path(p);
1556 return -EAGAIN;
1557 }
1558
1559 reada_for_balance(p, level);
1560 ret = balance_level(trans, root, p, level);
1561 if (ret)
1562 return ret;
1563
1564 b = p->nodes[level];
1565 if (!b) {
1566 btrfs_release_path(p);
1567 return -EAGAIN;
1568 }
1569 BUG_ON(btrfs_header_nritems(b) == 1);
1570 }
1571 return ret;
1572}
1573
1574int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
1575 u64 iobjectid, u64 ioff, u8 key_type,
1576 struct btrfs_key *found_key)
1577{
1578 int ret;
1579 struct btrfs_key key;
1580 struct extent_buffer *eb;
1581
1582 ASSERT(path);
1583 ASSERT(found_key);
1584
1585 key.type = key_type;
1586 key.objectid = iobjectid;
1587 key.offset = ioff;
1588
1589 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1590 if (ret < 0)
1591 return ret;
1592
1593 eb = path->nodes[0];
1594 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1595 ret = btrfs_next_leaf(fs_root, path);
1596 if (ret)
1597 return ret;
1598 eb = path->nodes[0];
1599 }
1600
1601 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1602 if (found_key->type != key.type ||
1603 found_key->objectid != key.objectid)
1604 return 1;
1605
1606 return 0;
1607}
1608
1609static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
1610 struct btrfs_path *p,
1611 int write_lock_level)
1612{
1613 struct btrfs_fs_info *fs_info = root->fs_info;
1614 struct extent_buffer *b;
1615 int root_lock;
1616 int level = 0;
1617
1618
1619 root_lock = BTRFS_READ_LOCK;
1620
1621 if (p->search_commit_root) {
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631 if (p->need_commit_sem) {
1632 down_read(&fs_info->commit_root_sem);
1633 b = btrfs_clone_extent_buffer(root->commit_root);
1634 up_read(&fs_info->commit_root_sem);
1635 if (!b)
1636 return ERR_PTR(-ENOMEM);
1637
1638 } else {
1639 b = root->commit_root;
1640 atomic_inc(&b->refs);
1641 }
1642 level = btrfs_header_level(b);
1643
1644
1645
1646
1647 ASSERT(p->skip_locking == 1);
1648
1649 goto out;
1650 }
1651
1652 if (p->skip_locking) {
1653 b = btrfs_root_node(root);
1654 level = btrfs_header_level(b);
1655 goto out;
1656 }
1657
1658
1659
1660
1661
1662 if (write_lock_level < BTRFS_MAX_LEVEL) {
1663
1664
1665
1666
1667 b = btrfs_read_lock_root_node(root);
1668 level = btrfs_header_level(b);
1669 if (level > write_lock_level)
1670 goto out;
1671
1672
1673 btrfs_tree_read_unlock(b);
1674 free_extent_buffer(b);
1675 }
1676
1677 b = btrfs_lock_root_node(root);
1678 root_lock = BTRFS_WRITE_LOCK;
1679
1680
1681 level = btrfs_header_level(b);
1682
1683out:
1684 p->nodes[level] = b;
1685 if (!p->skip_locking)
1686 p->locks[level] = root_lock;
1687
1688
1689
1690 return b;
1691}
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1726 const struct btrfs_key *key, struct btrfs_path *p,
1727 int ins_len, int cow)
1728{
1729 struct extent_buffer *b;
1730 int slot;
1731 int ret;
1732 int err;
1733 int level;
1734 int lowest_unlock = 1;
1735
1736 int write_lock_level = 0;
1737 u8 lowest_level = 0;
1738 int min_write_lock_level;
1739 int prev_cmp;
1740
1741 lowest_level = p->lowest_level;
1742 WARN_ON(lowest_level && ins_len > 0);
1743 WARN_ON(p->nodes[0] != NULL);
1744 BUG_ON(!cow && ins_len);
1745
1746 if (ins_len < 0) {
1747 lowest_unlock = 2;
1748
1749
1750
1751
1752
1753 write_lock_level = 2;
1754 } else if (ins_len > 0) {
1755
1756
1757
1758
1759 write_lock_level = 1;
1760 }
1761
1762 if (!cow)
1763 write_lock_level = -1;
1764
1765 if (cow && (p->keep_locks || p->lowest_level))
1766 write_lock_level = BTRFS_MAX_LEVEL;
1767
1768 min_write_lock_level = write_lock_level;
1769
1770again:
1771 prev_cmp = -1;
1772 b = btrfs_search_slot_get_root(root, p, write_lock_level);
1773 if (IS_ERR(b)) {
1774 ret = PTR_ERR(b);
1775 goto done;
1776 }
1777
1778 while (b) {
1779 int dec = 0;
1780
1781 level = btrfs_header_level(b);
1782
1783 if (cow) {
1784 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
1785
1786
1787
1788
1789
1790
1791 if (!should_cow_block(trans, root, b)) {
1792 trans->dirty = true;
1793 goto cow_done;
1794 }
1795
1796
1797
1798
1799
1800 if (level > write_lock_level ||
1801 (level + 1 > write_lock_level &&
1802 level + 1 < BTRFS_MAX_LEVEL &&
1803 p->nodes[level + 1])) {
1804 write_lock_level = level + 1;
1805 btrfs_release_path(p);
1806 goto again;
1807 }
1808
1809 if (last_level)
1810 err = btrfs_cow_block(trans, root, b, NULL, 0,
1811 &b,
1812 BTRFS_NESTING_COW);
1813 else
1814 err = btrfs_cow_block(trans, root, b,
1815 p->nodes[level + 1],
1816 p->slots[level + 1], &b,
1817 BTRFS_NESTING_COW);
1818 if (err) {
1819 ret = err;
1820 goto done;
1821 }
1822 }
1823cow_done:
1824 p->nodes[level] = b;
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841 if (!ins_len && !p->keep_locks) {
1842 int u = level + 1;
1843
1844 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
1845 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
1846 p->locks[u] = 0;
1847 }
1848 }
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858 if (prev_cmp == 0) {
1859 slot = 0;
1860 ret = 0;
1861 } else {
1862 ret = btrfs_bin_search(b, key, &slot);
1863 prev_cmp = ret;
1864 if (ret < 0)
1865 goto done;
1866 }
1867
1868 if (level == 0) {
1869 p->slots[level] = slot;
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880 if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
1881 ASSERT(ins_len >= sizeof(struct btrfs_item));
1882 ins_len -= sizeof(struct btrfs_item);
1883 }
1884 if (ins_len > 0 &&
1885 btrfs_leaf_free_space(b) < ins_len) {
1886 if (write_lock_level < 1) {
1887 write_lock_level = 1;
1888 btrfs_release_path(p);
1889 goto again;
1890 }
1891
1892 err = split_leaf(trans, root, key,
1893 p, ins_len, ret == 0);
1894
1895 BUG_ON(err > 0);
1896 if (err) {
1897 ret = err;
1898 goto done;
1899 }
1900 }
1901 if (!p->search_for_split)
1902 unlock_up(p, level, lowest_unlock,
1903 min_write_lock_level, NULL);
1904 goto done;
1905 }
1906 if (ret && slot > 0) {
1907 dec = 1;
1908 slot--;
1909 }
1910 p->slots[level] = slot;
1911 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
1912 &write_lock_level);
1913 if (err == -EAGAIN)
1914 goto again;
1915 if (err) {
1916 ret = err;
1917 goto done;
1918 }
1919 b = p->nodes[level];
1920 slot = p->slots[level];
1921
1922
1923
1924
1925
1926
1927 if (slot == 0 && ins_len && write_lock_level < level + 1) {
1928 write_lock_level = level + 1;
1929 btrfs_release_path(p);
1930 goto again;
1931 }
1932
1933 unlock_up(p, level, lowest_unlock, min_write_lock_level,
1934 &write_lock_level);
1935
1936 if (level == lowest_level) {
1937 if (dec)
1938 p->slots[level]++;
1939 goto done;
1940 }
1941
1942 err = read_block_for_search(root, p, &b, level, slot, key);
1943 if (err == -EAGAIN)
1944 goto again;
1945 if (err) {
1946 ret = err;
1947 goto done;
1948 }
1949
1950 if (!p->skip_locking) {
1951 level = btrfs_header_level(b);
1952 if (level <= write_lock_level) {
1953 btrfs_tree_lock(b);
1954 p->locks[level] = BTRFS_WRITE_LOCK;
1955 } else {
1956 btrfs_tree_read_lock(b);
1957 p->locks[level] = BTRFS_READ_LOCK;
1958 }
1959 p->nodes[level] = b;
1960 }
1961 }
1962 ret = 1;
1963done:
1964 if (ret < 0 && !p->skip_release_on_error)
1965 btrfs_release_path(p);
1966 return ret;
1967}
1968ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
1982 struct btrfs_path *p, u64 time_seq)
1983{
1984 struct btrfs_fs_info *fs_info = root->fs_info;
1985 struct extent_buffer *b;
1986 int slot;
1987 int ret;
1988 int err;
1989 int level;
1990 int lowest_unlock = 1;
1991 u8 lowest_level = 0;
1992
1993 lowest_level = p->lowest_level;
1994 WARN_ON(p->nodes[0] != NULL);
1995
1996 if (p->search_commit_root) {
1997 BUG_ON(time_seq);
1998 return btrfs_search_slot(NULL, root, key, p, 0, 0);
1999 }
2000
2001again:
2002 b = btrfs_get_old_root(root, time_seq);
2003 if (!b) {
2004 ret = -EIO;
2005 goto done;
2006 }
2007 level = btrfs_header_level(b);
2008 p->locks[level] = BTRFS_READ_LOCK;
2009
2010 while (b) {
2011 int dec = 0;
2012
2013 level = btrfs_header_level(b);
2014 p->nodes[level] = b;
2015
2016
2017
2018
2019
2020
2021
2022 btrfs_unlock_up_safe(p, level + 1);
2023
2024 ret = btrfs_bin_search(b, key, &slot);
2025 if (ret < 0)
2026 goto done;
2027
2028 if (level == 0) {
2029 p->slots[level] = slot;
2030 unlock_up(p, level, lowest_unlock, 0, NULL);
2031 goto done;
2032 }
2033
2034 if (ret && slot > 0) {
2035 dec = 1;
2036 slot--;
2037 }
2038 p->slots[level] = slot;
2039 unlock_up(p, level, lowest_unlock, 0, NULL);
2040
2041 if (level == lowest_level) {
2042 if (dec)
2043 p->slots[level]++;
2044 goto done;
2045 }
2046
2047 err = read_block_for_search(root, p, &b, level, slot, key);
2048 if (err == -EAGAIN)
2049 goto again;
2050 if (err) {
2051 ret = err;
2052 goto done;
2053 }
2054
2055 level = btrfs_header_level(b);
2056 btrfs_tree_read_lock(b);
2057 b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
2058 if (!b) {
2059 ret = -ENOMEM;
2060 goto done;
2061 }
2062 p->locks[level] = BTRFS_READ_LOCK;
2063 p->nodes[level] = b;
2064 }
2065 ret = 1;
2066done:
2067 if (ret < 0)
2068 btrfs_release_path(p);
2069
2070 return ret;
2071}
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085int btrfs_search_slot_for_read(struct btrfs_root *root,
2086 const struct btrfs_key *key,
2087 struct btrfs_path *p, int find_higher,
2088 int return_any)
2089{
2090 int ret;
2091 struct extent_buffer *leaf;
2092
2093again:
2094 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2095 if (ret <= 0)
2096 return ret;
2097
2098
2099
2100
2101
2102
2103
2104 leaf = p->nodes[0];
2105
2106 if (find_higher) {
2107 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2108 ret = btrfs_next_leaf(root, p);
2109 if (ret <= 0)
2110 return ret;
2111 if (!return_any)
2112 return 1;
2113
2114
2115
2116
2117 return_any = 0;
2118 find_higher = 0;
2119 btrfs_release_path(p);
2120 goto again;
2121 }
2122 } else {
2123 if (p->slots[0] == 0) {
2124 ret = btrfs_prev_leaf(root, p);
2125 if (ret < 0)
2126 return ret;
2127 if (!ret) {
2128 leaf = p->nodes[0];
2129 if (p->slots[0] == btrfs_header_nritems(leaf))
2130 p->slots[0]--;
2131 return 0;
2132 }
2133 if (!return_any)
2134 return 1;
2135
2136
2137
2138
2139 return_any = 0;
2140 find_higher = 1;
2141 btrfs_release_path(p);
2142 goto again;
2143 } else {
2144 --p->slots[0];
2145 }
2146 }
2147 return 0;
2148}
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158static void fixup_low_keys(struct btrfs_path *path,
2159 struct btrfs_disk_key *key, int level)
2160{
2161 int i;
2162 struct extent_buffer *t;
2163 int ret;
2164
2165 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2166 int tslot = path->slots[i];
2167
2168 if (!path->nodes[i])
2169 break;
2170 t = path->nodes[i];
2171 ret = btrfs_tree_mod_log_insert_key(t, tslot,
2172 BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC);
2173 BUG_ON(ret < 0);
2174 btrfs_set_node_key(t, key, tslot);
2175 btrfs_mark_buffer_dirty(path->nodes[i]);
2176 if (tslot != 0)
2177 break;
2178 }
2179}
2180
2181
2182
2183
2184
2185
2186
2187void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
2188 struct btrfs_path *path,
2189 const struct btrfs_key *new_key)
2190{
2191 struct btrfs_disk_key disk_key;
2192 struct extent_buffer *eb;
2193 int slot;
2194
2195 eb = path->nodes[0];
2196 slot = path->slots[0];
2197 if (slot > 0) {
2198 btrfs_item_key(eb, &disk_key, slot - 1);
2199 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
2200 btrfs_crit(fs_info,
2201 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2202 slot, btrfs_disk_key_objectid(&disk_key),
2203 btrfs_disk_key_type(&disk_key),
2204 btrfs_disk_key_offset(&disk_key),
2205 new_key->objectid, new_key->type,
2206 new_key->offset);
2207 btrfs_print_leaf(eb);
2208 BUG();
2209 }
2210 }
2211 if (slot < btrfs_header_nritems(eb) - 1) {
2212 btrfs_item_key(eb, &disk_key, slot + 1);
2213 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
2214 btrfs_crit(fs_info,
2215 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2216 slot, btrfs_disk_key_objectid(&disk_key),
2217 btrfs_disk_key_type(&disk_key),
2218 btrfs_disk_key_offset(&disk_key),
2219 new_key->objectid, new_key->type,
2220 new_key->offset);
2221 btrfs_print_leaf(eb);
2222 BUG();
2223 }
2224 }
2225
2226 btrfs_cpu_key_to_disk(&disk_key, new_key);
2227 btrfs_set_item_key(eb, &disk_key, slot);
2228 btrfs_mark_buffer_dirty(eb);
2229 if (slot == 0)
2230 fixup_low_keys(path, &disk_key, 1);
2231}
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253static bool check_sibling_keys(struct extent_buffer *left,
2254 struct extent_buffer *right)
2255{
2256 struct btrfs_key left_last;
2257 struct btrfs_key right_first;
2258 int level = btrfs_header_level(left);
2259 int nr_left = btrfs_header_nritems(left);
2260 int nr_right = btrfs_header_nritems(right);
2261
2262
2263 if (!nr_left || !nr_right)
2264 return false;
2265
2266 if (level) {
2267 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
2268 btrfs_node_key_to_cpu(right, &right_first, 0);
2269 } else {
2270 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
2271 btrfs_item_key_to_cpu(right, &right_first, 0);
2272 }
2273
2274 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
2275 btrfs_crit(left->fs_info,
2276"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
2277 left_last.objectid, left_last.type,
2278 left_last.offset, right_first.objectid,
2279 right_first.type, right_first.offset);
2280 return true;
2281 }
2282 return false;
2283}
2284
2285
2286
2287
2288
2289
2290
2291
2292static int push_node_left(struct btrfs_trans_handle *trans,
2293 struct extent_buffer *dst,
2294 struct extent_buffer *src, int empty)
2295{
2296 struct btrfs_fs_info *fs_info = trans->fs_info;
2297 int push_items = 0;
2298 int src_nritems;
2299 int dst_nritems;
2300 int ret = 0;
2301
2302 src_nritems = btrfs_header_nritems(src);
2303 dst_nritems = btrfs_header_nritems(dst);
2304 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2305 WARN_ON(btrfs_header_generation(src) != trans->transid);
2306 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2307
2308 if (!empty && src_nritems <= 8)
2309 return 1;
2310
2311 if (push_items <= 0)
2312 return 1;
2313
2314 if (empty) {
2315 push_items = min(src_nritems, push_items);
2316 if (push_items < src_nritems) {
2317
2318
2319
2320 if (src_nritems - push_items < 8) {
2321 if (push_items <= 8)
2322 return 1;
2323 push_items -= 8;
2324 }
2325 }
2326 } else
2327 push_items = min(src_nritems - 8, push_items);
2328
2329
2330 if (check_sibling_keys(dst, src)) {
2331 ret = -EUCLEAN;
2332 btrfs_abort_transaction(trans, ret);
2333 return ret;
2334 }
2335 ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
2336 if (ret) {
2337 btrfs_abort_transaction(trans, ret);
2338 return ret;
2339 }
2340 copy_extent_buffer(dst, src,
2341 btrfs_node_key_ptr_offset(dst_nritems),
2342 btrfs_node_key_ptr_offset(0),
2343 push_items * sizeof(struct btrfs_key_ptr));
2344
2345 if (push_items < src_nritems) {
2346
2347
2348
2349
2350 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2351 btrfs_node_key_ptr_offset(push_items),
2352 (src_nritems - push_items) *
2353 sizeof(struct btrfs_key_ptr));
2354 }
2355 btrfs_set_header_nritems(src, src_nritems - push_items);
2356 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2357 btrfs_mark_buffer_dirty(src);
2358 btrfs_mark_buffer_dirty(dst);
2359
2360 return ret;
2361}
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372static int balance_node_right(struct btrfs_trans_handle *trans,
2373 struct extent_buffer *dst,
2374 struct extent_buffer *src)
2375{
2376 struct btrfs_fs_info *fs_info = trans->fs_info;
2377 int push_items = 0;
2378 int max_push;
2379 int src_nritems;
2380 int dst_nritems;
2381 int ret = 0;
2382
2383 WARN_ON(btrfs_header_generation(src) != trans->transid);
2384 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2385
2386 src_nritems = btrfs_header_nritems(src);
2387 dst_nritems = btrfs_header_nritems(dst);
2388 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2389 if (push_items <= 0)
2390 return 1;
2391
2392 if (src_nritems < 4)
2393 return 1;
2394
2395 max_push = src_nritems / 2 + 1;
2396
2397 if (max_push >= src_nritems)
2398 return 1;
2399
2400 if (max_push < push_items)
2401 push_items = max_push;
2402
2403
2404 if (check_sibling_keys(src, dst)) {
2405 ret = -EUCLEAN;
2406 btrfs_abort_transaction(trans, ret);
2407 return ret;
2408 }
2409 ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
2410 BUG_ON(ret < 0);
2411 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2412 btrfs_node_key_ptr_offset(0),
2413 (dst_nritems) *
2414 sizeof(struct btrfs_key_ptr));
2415
2416 ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
2417 push_items);
2418 if (ret) {
2419 btrfs_abort_transaction(trans, ret);
2420 return ret;
2421 }
2422 copy_extent_buffer(dst, src,
2423 btrfs_node_key_ptr_offset(0),
2424 btrfs_node_key_ptr_offset(src_nritems - push_items),
2425 push_items * sizeof(struct btrfs_key_ptr));
2426
2427 btrfs_set_header_nritems(src, src_nritems - push_items);
2428 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2429
2430 btrfs_mark_buffer_dirty(src);
2431 btrfs_mark_buffer_dirty(dst);
2432
2433 return ret;
2434}
2435
2436
2437
2438
2439
2440
2441
2442
2443static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2444 struct btrfs_root *root,
2445 struct btrfs_path *path, int level)
2446{
2447 struct btrfs_fs_info *fs_info = root->fs_info;
2448 u64 lower_gen;
2449 struct extent_buffer *lower;
2450 struct extent_buffer *c;
2451 struct extent_buffer *old;
2452 struct btrfs_disk_key lower_key;
2453 int ret;
2454
2455 BUG_ON(path->nodes[level]);
2456 BUG_ON(path->nodes[level-1] != root->node);
2457
2458 lower = path->nodes[level-1];
2459 if (level == 1)
2460 btrfs_item_key(lower, &lower_key, 0);
2461 else
2462 btrfs_node_key(lower, &lower_key, 0);
2463
2464 c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
2465 root->node->start, 0,
2466 BTRFS_NESTING_NEW_ROOT);
2467 if (IS_ERR(c))
2468 return PTR_ERR(c);
2469
2470 root_add_used(root, fs_info->nodesize);
2471
2472 btrfs_set_header_nritems(c, 1);
2473 btrfs_set_node_key(c, &lower_key, 0);
2474 btrfs_set_node_blockptr(c, 0, lower->start);
2475 lower_gen = btrfs_header_generation(lower);
2476 WARN_ON(lower_gen != trans->transid);
2477
2478 btrfs_set_node_ptr_generation(c, 0, lower_gen);
2479
2480 btrfs_mark_buffer_dirty(c);
2481
2482 old = root->node;
2483 ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
2484 BUG_ON(ret < 0);
2485 rcu_assign_pointer(root->node, c);
2486
2487
2488 free_extent_buffer(old);
2489
2490 add_root_to_dirty_list(root);
2491 atomic_inc(&c->refs);
2492 path->nodes[level] = c;
2493 path->locks[level] = BTRFS_WRITE_LOCK;
2494 path->slots[level] = 0;
2495 return 0;
2496}
2497
2498
2499
2500
2501
2502
2503
2504
2505static void insert_ptr(struct btrfs_trans_handle *trans,
2506 struct btrfs_path *path,
2507 struct btrfs_disk_key *key, u64 bytenr,
2508 int slot, int level)
2509{
2510 struct extent_buffer *lower;
2511 int nritems;
2512 int ret;
2513
2514 BUG_ON(!path->nodes[level]);
2515 btrfs_assert_tree_locked(path->nodes[level]);
2516 lower = path->nodes[level];
2517 nritems = btrfs_header_nritems(lower);
2518 BUG_ON(slot > nritems);
2519 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
2520 if (slot != nritems) {
2521 if (level) {
2522 ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
2523 slot, nritems - slot);
2524 BUG_ON(ret < 0);
2525 }
2526 memmove_extent_buffer(lower,
2527 btrfs_node_key_ptr_offset(slot + 1),
2528 btrfs_node_key_ptr_offset(slot),
2529 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2530 }
2531 if (level) {
2532 ret = btrfs_tree_mod_log_insert_key(lower, slot,
2533 BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS);
2534 BUG_ON(ret < 0);
2535 }
2536 btrfs_set_node_key(lower, key, slot);
2537 btrfs_set_node_blockptr(lower, slot, bytenr);
2538 WARN_ON(trans->transid == 0);
2539 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2540 btrfs_set_header_nritems(lower, nritems + 1);
2541 btrfs_mark_buffer_dirty(lower);
2542}
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553static noinline int split_node(struct btrfs_trans_handle *trans,
2554 struct btrfs_root *root,
2555 struct btrfs_path *path, int level)
2556{
2557 struct btrfs_fs_info *fs_info = root->fs_info;
2558 struct extent_buffer *c;
2559 struct extent_buffer *split;
2560 struct btrfs_disk_key disk_key;
2561 int mid;
2562 int ret;
2563 u32 c_nritems;
2564
2565 c = path->nodes[level];
2566 WARN_ON(btrfs_header_generation(c) != trans->transid);
2567 if (c == root->node) {
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578 ret = insert_new_root(trans, root, path, level + 1);
2579 if (ret)
2580 return ret;
2581 } else {
2582 ret = push_nodes_for_insert(trans, root, path, level);
2583 c = path->nodes[level];
2584 if (!ret && btrfs_header_nritems(c) <
2585 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
2586 return 0;
2587 if (ret < 0)
2588 return ret;
2589 }
2590
2591 c_nritems = btrfs_header_nritems(c);
2592 mid = (c_nritems + 1) / 2;
2593 btrfs_node_key(c, &disk_key, mid);
2594
2595 split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
2596 c->start, 0, BTRFS_NESTING_SPLIT);
2597 if (IS_ERR(split))
2598 return PTR_ERR(split);
2599
2600 root_add_used(root, fs_info->nodesize);
2601 ASSERT(btrfs_header_level(c) == level);
2602
2603 ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
2604 if (ret) {
2605 btrfs_abort_transaction(trans, ret);
2606 return ret;
2607 }
2608 copy_extent_buffer(split, c,
2609 btrfs_node_key_ptr_offset(0),
2610 btrfs_node_key_ptr_offset(mid),
2611 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2612 btrfs_set_header_nritems(split, c_nritems - mid);
2613 btrfs_set_header_nritems(c, mid);
2614
2615 btrfs_mark_buffer_dirty(c);
2616 btrfs_mark_buffer_dirty(split);
2617
2618 insert_ptr(trans, path, &disk_key, split->start,
2619 path->slots[level + 1] + 1, level + 1);
2620
2621 if (path->slots[level] >= mid) {
2622 path->slots[level] -= mid;
2623 btrfs_tree_unlock(c);
2624 free_extent_buffer(c);
2625 path->nodes[level] = split;
2626 path->slots[level + 1] += 1;
2627 } else {
2628 btrfs_tree_unlock(split);
2629 free_extent_buffer(split);
2630 }
2631 return 0;
2632}
2633
2634
2635
2636
2637
2638
2639static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2640{
2641 struct btrfs_item *start_item;
2642 struct btrfs_item *end_item;
2643 int data_len;
2644 int nritems = btrfs_header_nritems(l);
2645 int end = min(nritems, start + nr) - 1;
2646
2647 if (!nr)
2648 return 0;
2649 start_item = btrfs_item_nr(start);
2650 end_item = btrfs_item_nr(end);
2651 data_len = btrfs_item_offset(l, start_item) +
2652 btrfs_item_size(l, start_item);
2653 data_len = data_len - btrfs_item_offset(l, end_item);
2654 data_len += sizeof(struct btrfs_item) * nr;
2655 WARN_ON(data_len < 0);
2656 return data_len;
2657}
2658
2659
2660
2661
2662
2663
2664noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
2665{
2666 struct btrfs_fs_info *fs_info = leaf->fs_info;
2667 int nritems = btrfs_header_nritems(leaf);
2668 int ret;
2669
2670 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
2671 if (ret < 0) {
2672 btrfs_crit(fs_info,
2673 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
2674 ret,
2675 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
2676 leaf_space_used(leaf, 0, nritems), nritems);
2677 }
2678 return ret;
2679}
2680
2681
2682
2683
2684
2685static noinline int __push_leaf_right(struct btrfs_path *path,
2686 int data_size, int empty,
2687 struct extent_buffer *right,
2688 int free_space, u32 left_nritems,
2689 u32 min_slot)
2690{
2691 struct btrfs_fs_info *fs_info = right->fs_info;
2692 struct extent_buffer *left = path->nodes[0];
2693 struct extent_buffer *upper = path->nodes[1];
2694 struct btrfs_map_token token;
2695 struct btrfs_disk_key disk_key;
2696 int slot;
2697 u32 i;
2698 int push_space = 0;
2699 int push_items = 0;
2700 struct btrfs_item *item;
2701 u32 nr;
2702 u32 right_nritems;
2703 u32 data_end;
2704 u32 this_item_size;
2705
2706 if (empty)
2707 nr = 0;
2708 else
2709 nr = max_t(u32, 1, min_slot);
2710
2711 if (path->slots[0] >= left_nritems)
2712 push_space += data_size;
2713
2714 slot = path->slots[1];
2715 i = left_nritems - 1;
2716 while (i >= nr) {
2717 item = btrfs_item_nr(i);
2718
2719 if (!empty && push_items > 0) {
2720 if (path->slots[0] > i)
2721 break;
2722 if (path->slots[0] == i) {
2723 int space = btrfs_leaf_free_space(left);
2724
2725 if (space + push_space * 2 > free_space)
2726 break;
2727 }
2728 }
2729
2730 if (path->slots[0] == i)
2731 push_space += data_size;
2732
2733 this_item_size = btrfs_item_size(left, item);
2734 if (this_item_size + sizeof(*item) + push_space > free_space)
2735 break;
2736
2737 push_items++;
2738 push_space += this_item_size + sizeof(*item);
2739 if (i == 0)
2740 break;
2741 i--;
2742 }
2743
2744 if (push_items == 0)
2745 goto out_unlock;
2746
2747 WARN_ON(!empty && push_items == left_nritems);
2748
2749
2750 right_nritems = btrfs_header_nritems(right);
2751
2752 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2753 push_space -= leaf_data_end(left);
2754
2755
2756 data_end = leaf_data_end(right);
2757 memmove_extent_buffer(right,
2758 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
2759 BTRFS_LEAF_DATA_OFFSET + data_end,
2760 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
2761
2762
2763 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
2764 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
2765 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
2766 push_space);
2767
2768 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2769 btrfs_item_nr_offset(0),
2770 right_nritems * sizeof(struct btrfs_item));
2771
2772
2773 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2774 btrfs_item_nr_offset(left_nritems - push_items),
2775 push_items * sizeof(struct btrfs_item));
2776
2777
2778 btrfs_init_map_token(&token, right);
2779 right_nritems += push_items;
2780 btrfs_set_header_nritems(right, right_nritems);
2781 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
2782 for (i = 0; i < right_nritems; i++) {
2783 item = btrfs_item_nr(i);
2784 push_space -= btrfs_token_item_size(&token, item);
2785 btrfs_set_token_item_offset(&token, item, push_space);
2786 }
2787
2788 left_nritems -= push_items;
2789 btrfs_set_header_nritems(left, left_nritems);
2790
2791 if (left_nritems)
2792 btrfs_mark_buffer_dirty(left);
2793 else
2794 btrfs_clean_tree_block(left);
2795
2796 btrfs_mark_buffer_dirty(right);
2797
2798 btrfs_item_key(right, &disk_key, 0);
2799 btrfs_set_node_key(upper, &disk_key, slot + 1);
2800 btrfs_mark_buffer_dirty(upper);
2801
2802
2803 if (path->slots[0] >= left_nritems) {
2804 path->slots[0] -= left_nritems;
2805 if (btrfs_header_nritems(path->nodes[0]) == 0)
2806 btrfs_clean_tree_block(path->nodes[0]);
2807 btrfs_tree_unlock(path->nodes[0]);
2808 free_extent_buffer(path->nodes[0]);
2809 path->nodes[0] = right;
2810 path->slots[1] += 1;
2811 } else {
2812 btrfs_tree_unlock(right);
2813 free_extent_buffer(right);
2814 }
2815 return 0;
2816
2817out_unlock:
2818 btrfs_tree_unlock(right);
2819 free_extent_buffer(right);
2820 return 1;
2821}
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2834 *root, struct btrfs_path *path,
2835 int min_data_size, int data_size,
2836 int empty, u32 min_slot)
2837{
2838 struct extent_buffer *left = path->nodes[0];
2839 struct extent_buffer *right;
2840 struct extent_buffer *upper;
2841 int slot;
2842 int free_space;
2843 u32 left_nritems;
2844 int ret;
2845
2846 if (!path->nodes[1])
2847 return 1;
2848
2849 slot = path->slots[1];
2850 upper = path->nodes[1];
2851 if (slot >= btrfs_header_nritems(upper) - 1)
2852 return 1;
2853
2854 btrfs_assert_tree_locked(path->nodes[1]);
2855
2856 right = btrfs_read_node_slot(upper, slot + 1);
2857
2858
2859
2860
2861 if (IS_ERR(right))
2862 return 1;
2863
2864 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
2865
2866 free_space = btrfs_leaf_free_space(right);
2867 if (free_space < data_size)
2868 goto out_unlock;
2869
2870
2871 ret = btrfs_cow_block(trans, root, right, upper,
2872 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
2873 if (ret)
2874 goto out_unlock;
2875
2876 free_space = btrfs_leaf_free_space(right);
2877 if (free_space < data_size)
2878 goto out_unlock;
2879
2880 left_nritems = btrfs_header_nritems(left);
2881 if (left_nritems == 0)
2882 goto out_unlock;
2883
2884 if (check_sibling_keys(left, right)) {
2885 ret = -EUCLEAN;
2886 btrfs_tree_unlock(right);
2887 free_extent_buffer(right);
2888 return ret;
2889 }
2890 if (path->slots[0] == left_nritems && !empty) {
2891
2892
2893
2894
2895 btrfs_tree_unlock(left);
2896 free_extent_buffer(left);
2897 path->nodes[0] = right;
2898 path->slots[0] = 0;
2899 path->slots[1]++;
2900 return 0;
2901 }
2902
2903 return __push_leaf_right(path, min_data_size, empty,
2904 right, free_space, left_nritems, min_slot);
2905out_unlock:
2906 btrfs_tree_unlock(right);
2907 free_extent_buffer(right);
2908 return 1;
2909}
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
2920 int empty, struct extent_buffer *left,
2921 int free_space, u32 right_nritems,
2922 u32 max_slot)
2923{
2924 struct btrfs_fs_info *fs_info = left->fs_info;
2925 struct btrfs_disk_key disk_key;
2926 struct extent_buffer *right = path->nodes[0];
2927 int i;
2928 int push_space = 0;
2929 int push_items = 0;
2930 struct btrfs_item *item;
2931 u32 old_left_nritems;
2932 u32 nr;
2933 int ret = 0;
2934 u32 this_item_size;
2935 u32 old_left_item_size;
2936 struct btrfs_map_token token;
2937
2938 if (empty)
2939 nr = min(right_nritems, max_slot);
2940 else
2941 nr = min(right_nritems - 1, max_slot);
2942
2943 for (i = 0; i < nr; i++) {
2944 item = btrfs_item_nr(i);
2945
2946 if (!empty && push_items > 0) {
2947 if (path->slots[0] < i)
2948 break;
2949 if (path->slots[0] == i) {
2950 int space = btrfs_leaf_free_space(right);
2951
2952 if (space + push_space * 2 > free_space)
2953 break;
2954 }
2955 }
2956
2957 if (path->slots[0] == i)
2958 push_space += data_size;
2959
2960 this_item_size = btrfs_item_size(right, item);
2961 if (this_item_size + sizeof(*item) + push_space > free_space)
2962 break;
2963
2964 push_items++;
2965 push_space += this_item_size + sizeof(*item);
2966 }
2967
2968 if (push_items == 0) {
2969 ret = 1;
2970 goto out;
2971 }
2972 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
2973
2974
2975 copy_extent_buffer(left, right,
2976 btrfs_item_nr_offset(btrfs_header_nritems(left)),
2977 btrfs_item_nr_offset(0),
2978 push_items * sizeof(struct btrfs_item));
2979
2980 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
2981 btrfs_item_offset_nr(right, push_items - 1);
2982
2983 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
2984 leaf_data_end(left) - push_space,
2985 BTRFS_LEAF_DATA_OFFSET +
2986 btrfs_item_offset_nr(right, push_items - 1),
2987 push_space);
2988 old_left_nritems = btrfs_header_nritems(left);
2989 BUG_ON(old_left_nritems <= 0);
2990
2991 btrfs_init_map_token(&token, left);
2992 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2993 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2994 u32 ioff;
2995
2996 item = btrfs_item_nr(i);
2997
2998 ioff = btrfs_token_item_offset(&token, item);
2999 btrfs_set_token_item_offset(&token, item,
3000 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
3001 }
3002 btrfs_set_header_nritems(left, old_left_nritems + push_items);
3003
3004
3005 if (push_items > right_nritems)
3006 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
3007 right_nritems);
3008
3009 if (push_items < right_nritems) {
3010 push_space = btrfs_item_offset_nr(right, push_items - 1) -
3011 leaf_data_end(right);
3012 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
3013 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
3014 BTRFS_LEAF_DATA_OFFSET +
3015 leaf_data_end(right), push_space);
3016
3017 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
3018 btrfs_item_nr_offset(push_items),
3019 (btrfs_header_nritems(right) - push_items) *
3020 sizeof(struct btrfs_item));
3021 }
3022
3023 btrfs_init_map_token(&token, right);
3024 right_nritems -= push_items;
3025 btrfs_set_header_nritems(right, right_nritems);
3026 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
3027 for (i = 0; i < right_nritems; i++) {
3028 item = btrfs_item_nr(i);
3029
3030 push_space = push_space - btrfs_token_item_size(&token, item);
3031 btrfs_set_token_item_offset(&token, item, push_space);
3032 }
3033
3034 btrfs_mark_buffer_dirty(left);
3035 if (right_nritems)
3036 btrfs_mark_buffer_dirty(right);
3037 else
3038 btrfs_clean_tree_block(right);
3039
3040 btrfs_item_key(right, &disk_key, 0);
3041 fixup_low_keys(path, &disk_key, 1);
3042
3043
3044 if (path->slots[0] < push_items) {
3045 path->slots[0] += old_left_nritems;
3046 btrfs_tree_unlock(path->nodes[0]);
3047 free_extent_buffer(path->nodes[0]);
3048 path->nodes[0] = left;
3049 path->slots[1] -= 1;
3050 } else {
3051 btrfs_tree_unlock(left);
3052 free_extent_buffer(left);
3053 path->slots[0] -= push_items;
3054 }
3055 BUG_ON(path->slots[0] < 0);
3056 return ret;
3057out:
3058 btrfs_tree_unlock(left);
3059 free_extent_buffer(left);
3060 return ret;
3061}
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3072 *root, struct btrfs_path *path, int min_data_size,
3073 int data_size, int empty, u32 max_slot)
3074{
3075 struct extent_buffer *right = path->nodes[0];
3076 struct extent_buffer *left;
3077 int slot;
3078 int free_space;
3079 u32 right_nritems;
3080 int ret = 0;
3081
3082 slot = path->slots[1];
3083 if (slot == 0)
3084 return 1;
3085 if (!path->nodes[1])
3086 return 1;
3087
3088 right_nritems = btrfs_header_nritems(right);
3089 if (right_nritems == 0)
3090 return 1;
3091
3092 btrfs_assert_tree_locked(path->nodes[1]);
3093
3094 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
3095
3096
3097
3098
3099 if (IS_ERR(left))
3100 return 1;
3101
3102 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
3103
3104 free_space = btrfs_leaf_free_space(left);
3105 if (free_space < data_size) {
3106 ret = 1;
3107 goto out;
3108 }
3109
3110
3111 ret = btrfs_cow_block(trans, root, left,
3112 path->nodes[1], slot - 1, &left,
3113 BTRFS_NESTING_LEFT_COW);
3114 if (ret) {
3115
3116 if (ret == -ENOSPC)
3117 ret = 1;
3118 goto out;
3119 }
3120
3121 free_space = btrfs_leaf_free_space(left);
3122 if (free_space < data_size) {
3123 ret = 1;
3124 goto out;
3125 }
3126
3127 if (check_sibling_keys(left, right)) {
3128 ret = -EUCLEAN;
3129 goto out;
3130 }
3131 return __push_leaf_left(path, min_data_size,
3132 empty, left, free_space, right_nritems,
3133 max_slot);
3134out:
3135 btrfs_tree_unlock(left);
3136 free_extent_buffer(left);
3137 return ret;
3138}
3139
3140
3141
3142
3143
3144static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3145 struct btrfs_path *path,
3146 struct extent_buffer *l,
3147 struct extent_buffer *right,
3148 int slot, int mid, int nritems)
3149{
3150 struct btrfs_fs_info *fs_info = trans->fs_info;
3151 int data_copy_size;
3152 int rt_data_off;
3153 int i;
3154 struct btrfs_disk_key disk_key;
3155 struct btrfs_map_token token;
3156
3157 nritems = nritems - mid;
3158 btrfs_set_header_nritems(right, nritems);
3159 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
3160
3161 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
3162 btrfs_item_nr_offset(mid),
3163 nritems * sizeof(struct btrfs_item));
3164
3165 copy_extent_buffer(right, l,
3166 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
3167 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
3168 leaf_data_end(l), data_copy_size);
3169
3170 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
3171
3172 btrfs_init_map_token(&token, right);
3173 for (i = 0; i < nritems; i++) {
3174 struct btrfs_item *item = btrfs_item_nr(i);
3175 u32 ioff;
3176
3177 ioff = btrfs_token_item_offset(&token, item);
3178 btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
3179 }
3180
3181 btrfs_set_header_nritems(l, mid);
3182 btrfs_item_key(right, &disk_key, 0);
3183 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
3184
3185 btrfs_mark_buffer_dirty(right);
3186 btrfs_mark_buffer_dirty(l);
3187 BUG_ON(path->slots[0] != slot);
3188
3189 if (mid <= slot) {
3190 btrfs_tree_unlock(path->nodes[0]);
3191 free_extent_buffer(path->nodes[0]);
3192 path->nodes[0] = right;
3193 path->slots[0] -= mid;
3194 path->slots[1] += 1;
3195 } else {
3196 btrfs_tree_unlock(right);
3197 free_extent_buffer(right);
3198 }
3199
3200 BUG_ON(path->slots[0] < 0);
3201}
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3214 struct btrfs_root *root,
3215 struct btrfs_path *path,
3216 int data_size)
3217{
3218 int ret;
3219 int progress = 0;
3220 int slot;
3221 u32 nritems;
3222 int space_needed = data_size;
3223
3224 slot = path->slots[0];
3225 if (slot < btrfs_header_nritems(path->nodes[0]))
3226 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3227
3228
3229
3230
3231
3232 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
3233 if (ret < 0)
3234 return ret;
3235
3236 if (ret == 0)
3237 progress++;
3238
3239 nritems = btrfs_header_nritems(path->nodes[0]);
3240
3241
3242
3243
3244 if (path->slots[0] == 0 || path->slots[0] == nritems)
3245 return 0;
3246
3247 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3248 return 0;
3249
3250
3251 slot = path->slots[0];
3252 space_needed = data_size;
3253 if (slot > 0)
3254 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3255 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
3256 if (ret < 0)
3257 return ret;
3258
3259 if (ret == 0)
3260 progress++;
3261
3262 if (progress)
3263 return 0;
3264 return 1;
3265}
3266
3267
3268
3269
3270
3271
3272
3273static noinline int split_leaf(struct btrfs_trans_handle *trans,
3274 struct btrfs_root *root,
3275 const struct btrfs_key *ins_key,
3276 struct btrfs_path *path, int data_size,
3277 int extend)
3278{
3279 struct btrfs_disk_key disk_key;
3280 struct extent_buffer *l;
3281 u32 nritems;
3282 int mid;
3283 int slot;
3284 struct extent_buffer *right;
3285 struct btrfs_fs_info *fs_info = root->fs_info;
3286 int ret = 0;
3287 int wret;
3288 int split;
3289 int num_doubles = 0;
3290 int tried_avoid_double = 0;
3291
3292 l = path->nodes[0];
3293 slot = path->slots[0];
3294 if (extend && data_size + btrfs_item_size_nr(l, slot) +
3295 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
3296 return -EOVERFLOW;
3297
3298
3299 if (data_size && path->nodes[1]) {
3300 int space_needed = data_size;
3301
3302 if (slot < btrfs_header_nritems(l))
3303 space_needed -= btrfs_leaf_free_space(l);
3304
3305 wret = push_leaf_right(trans, root, path, space_needed,
3306 space_needed, 0, 0);
3307 if (wret < 0)
3308 return wret;
3309 if (wret) {
3310 space_needed = data_size;
3311 if (slot > 0)
3312 space_needed -= btrfs_leaf_free_space(l);
3313 wret = push_leaf_left(trans, root, path, space_needed,
3314 space_needed, 0, (u32)-1);
3315 if (wret < 0)
3316 return wret;
3317 }
3318 l = path->nodes[0];
3319
3320
3321 if (btrfs_leaf_free_space(l) >= data_size)
3322 return 0;
3323 }
3324
3325 if (!path->nodes[1]) {
3326 ret = insert_new_root(trans, root, path, 1);
3327 if (ret)
3328 return ret;
3329 }
3330again:
3331 split = 1;
3332 l = path->nodes[0];
3333 slot = path->slots[0];
3334 nritems = btrfs_header_nritems(l);
3335 mid = (nritems + 1) / 2;
3336
3337 if (mid <= slot) {
3338 if (nritems == 1 ||
3339 leaf_space_used(l, mid, nritems - mid) + data_size >
3340 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3341 if (slot >= nritems) {
3342 split = 0;
3343 } else {
3344 mid = slot;
3345 if (mid != nritems &&
3346 leaf_space_used(l, mid, nritems - mid) +
3347 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3348 if (data_size && !tried_avoid_double)
3349 goto push_for_double;
3350 split = 2;
3351 }
3352 }
3353 }
3354 } else {
3355 if (leaf_space_used(l, 0, mid) + data_size >
3356 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3357 if (!extend && data_size && slot == 0) {
3358 split = 0;
3359 } else if ((extend || !data_size) && slot == 0) {
3360 mid = 1;
3361 } else {
3362 mid = slot;
3363 if (mid != nritems &&
3364 leaf_space_used(l, mid, nritems - mid) +
3365 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3366 if (data_size && !tried_avoid_double)
3367 goto push_for_double;
3368 split = 2;
3369 }
3370 }
3371 }
3372 }
3373
3374 if (split == 0)
3375 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3376 else
3377 btrfs_item_key(l, &disk_key, mid);
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387 right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
3388 l->start, 0, num_doubles ?
3389 BTRFS_NESTING_NEW_ROOT :
3390 BTRFS_NESTING_SPLIT);
3391 if (IS_ERR(right))
3392 return PTR_ERR(right);
3393
3394 root_add_used(root, fs_info->nodesize);
3395
3396 if (split == 0) {
3397 if (mid <= slot) {
3398 btrfs_set_header_nritems(right, 0);
3399 insert_ptr(trans, path, &disk_key,
3400 right->start, path->slots[1] + 1, 1);
3401 btrfs_tree_unlock(path->nodes[0]);
3402 free_extent_buffer(path->nodes[0]);
3403 path->nodes[0] = right;
3404 path->slots[0] = 0;
3405 path->slots[1] += 1;
3406 } else {
3407 btrfs_set_header_nritems(right, 0);
3408 insert_ptr(trans, path, &disk_key,
3409 right->start, path->slots[1], 1);
3410 btrfs_tree_unlock(path->nodes[0]);
3411 free_extent_buffer(path->nodes[0]);
3412 path->nodes[0] = right;
3413 path->slots[0] = 0;
3414 if (path->slots[1] == 0)
3415 fixup_low_keys(path, &disk_key, 1);
3416 }
3417
3418
3419
3420
3421
3422 return ret;
3423 }
3424
3425 copy_for_split(trans, path, l, right, slot, mid, nritems);
3426
3427 if (split == 2) {
3428 BUG_ON(num_doubles != 0);
3429 num_doubles++;
3430 goto again;
3431 }
3432
3433 return 0;
3434
3435push_for_double:
3436 push_for_double_split(trans, root, path, data_size);
3437 tried_avoid_double = 1;
3438 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3439 return 0;
3440 goto again;
3441}
3442
3443static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3444 struct btrfs_root *root,
3445 struct btrfs_path *path, int ins_len)
3446{
3447 struct btrfs_key key;
3448 struct extent_buffer *leaf;
3449 struct btrfs_file_extent_item *fi;
3450 u64 extent_len = 0;
3451 u32 item_size;
3452 int ret;
3453
3454 leaf = path->nodes[0];
3455 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3456
3457 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3458 key.type != BTRFS_EXTENT_CSUM_KEY);
3459
3460 if (btrfs_leaf_free_space(leaf) >= ins_len)
3461 return 0;
3462
3463 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3464 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3465 fi = btrfs_item_ptr(leaf, path->slots[0],
3466 struct btrfs_file_extent_item);
3467 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3468 }
3469 btrfs_release_path(path);
3470
3471 path->keep_locks = 1;
3472 path->search_for_split = 1;
3473 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3474 path->search_for_split = 0;
3475 if (ret > 0)
3476 ret = -EAGAIN;
3477 if (ret < 0)
3478 goto err;
3479
3480 ret = -EAGAIN;
3481 leaf = path->nodes[0];
3482
3483 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3484 goto err;
3485
3486
3487 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
3488 goto err;
3489
3490 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3491 fi = btrfs_item_ptr(leaf, path->slots[0],
3492 struct btrfs_file_extent_item);
3493 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3494 goto err;
3495 }
3496
3497 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3498 if (ret)
3499 goto err;
3500
3501 path->keep_locks = 0;
3502 btrfs_unlock_up_safe(path, 1);
3503 return 0;
3504err:
3505 path->keep_locks = 0;
3506 return ret;
3507}
3508
3509static noinline int split_item(struct btrfs_path *path,
3510 const struct btrfs_key *new_key,
3511 unsigned long split_offset)
3512{
3513 struct extent_buffer *leaf;
3514 struct btrfs_item *item;
3515 struct btrfs_item *new_item;
3516 int slot;
3517 char *buf;
3518 u32 nritems;
3519 u32 item_size;
3520 u32 orig_offset;
3521 struct btrfs_disk_key disk_key;
3522
3523 leaf = path->nodes[0];
3524 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
3525
3526 item = btrfs_item_nr(path->slots[0]);
3527 orig_offset = btrfs_item_offset(leaf, item);
3528 item_size = btrfs_item_size(leaf, item);
3529
3530 buf = kmalloc(item_size, GFP_NOFS);
3531 if (!buf)
3532 return -ENOMEM;
3533
3534 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3535 path->slots[0]), item_size);
3536
3537 slot = path->slots[0] + 1;
3538 nritems = btrfs_header_nritems(leaf);
3539 if (slot != nritems) {
3540
3541 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3542 btrfs_item_nr_offset(slot),
3543 (nritems - slot) * sizeof(struct btrfs_item));
3544 }
3545
3546 btrfs_cpu_key_to_disk(&disk_key, new_key);
3547 btrfs_set_item_key(leaf, &disk_key, slot);
3548
3549 new_item = btrfs_item_nr(slot);
3550
3551 btrfs_set_item_offset(leaf, new_item, orig_offset);
3552 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3553
3554 btrfs_set_item_offset(leaf, item,
3555 orig_offset + item_size - split_offset);
3556 btrfs_set_item_size(leaf, item, split_offset);
3557
3558 btrfs_set_header_nritems(leaf, nritems + 1);
3559
3560
3561 write_extent_buffer(leaf, buf,
3562 btrfs_item_ptr_offset(leaf, path->slots[0]),
3563 split_offset);
3564
3565
3566 write_extent_buffer(leaf, buf + split_offset,
3567 btrfs_item_ptr_offset(leaf, slot),
3568 item_size - split_offset);
3569 btrfs_mark_buffer_dirty(leaf);
3570
3571 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
3572 kfree(buf);
3573 return 0;
3574}
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591int btrfs_split_item(struct btrfs_trans_handle *trans,
3592 struct btrfs_root *root,
3593 struct btrfs_path *path,
3594 const struct btrfs_key *new_key,
3595 unsigned long split_offset)
3596{
3597 int ret;
3598 ret = setup_leaf_for_split(trans, root, path,
3599 sizeof(struct btrfs_item));
3600 if (ret)
3601 return ret;
3602
3603 ret = split_item(path, new_key, split_offset);
3604 return ret;
3605}
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3616 struct btrfs_root *root,
3617 struct btrfs_path *path,
3618 const struct btrfs_key *new_key)
3619{
3620 struct extent_buffer *leaf;
3621 int ret;
3622 u32 item_size;
3623
3624 leaf = path->nodes[0];
3625 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3626 ret = setup_leaf_for_split(trans, root, path,
3627 item_size + sizeof(struct btrfs_item));
3628 if (ret)
3629 return ret;
3630
3631 path->slots[0]++;
3632 setup_items_for_insert(root, path, new_key, &item_size, 1);
3633 leaf = path->nodes[0];
3634 memcpy_extent_buffer(leaf,
3635 btrfs_item_ptr_offset(leaf, path->slots[0]),
3636 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3637 item_size);
3638 return 0;
3639}
3640
3641
3642
3643
3644
3645
3646
3647void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
3648{
3649 int slot;
3650 struct extent_buffer *leaf;
3651 struct btrfs_item *item;
3652 u32 nritems;
3653 unsigned int data_end;
3654 unsigned int old_data_start;
3655 unsigned int old_size;
3656 unsigned int size_diff;
3657 int i;
3658 struct btrfs_map_token token;
3659
3660 leaf = path->nodes[0];
3661 slot = path->slots[0];
3662
3663 old_size = btrfs_item_size_nr(leaf, slot);
3664 if (old_size == new_size)
3665 return;
3666
3667 nritems = btrfs_header_nritems(leaf);
3668 data_end = leaf_data_end(leaf);
3669
3670 old_data_start = btrfs_item_offset_nr(leaf, slot);
3671
3672 size_diff = old_size - new_size;
3673
3674 BUG_ON(slot < 0);
3675 BUG_ON(slot >= nritems);
3676
3677
3678
3679
3680
3681 btrfs_init_map_token(&token, leaf);
3682 for (i = slot; i < nritems; i++) {
3683 u32 ioff;
3684 item = btrfs_item_nr(i);
3685
3686 ioff = btrfs_token_item_offset(&token, item);
3687 btrfs_set_token_item_offset(&token, item, ioff + size_diff);
3688 }
3689
3690
3691 if (from_end) {
3692 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3693 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3694 data_end, old_data_start + new_size - data_end);
3695 } else {
3696 struct btrfs_disk_key disk_key;
3697 u64 offset;
3698
3699 btrfs_item_key(leaf, &disk_key, slot);
3700
3701 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3702 unsigned long ptr;
3703 struct btrfs_file_extent_item *fi;
3704
3705 fi = btrfs_item_ptr(leaf, slot,
3706 struct btrfs_file_extent_item);
3707 fi = (struct btrfs_file_extent_item *)(
3708 (unsigned long)fi - size_diff);
3709
3710 if (btrfs_file_extent_type(leaf, fi) ==
3711 BTRFS_FILE_EXTENT_INLINE) {
3712 ptr = btrfs_item_ptr_offset(leaf, slot);
3713 memmove_extent_buffer(leaf, ptr,
3714 (unsigned long)fi,
3715 BTRFS_FILE_EXTENT_INLINE_DATA_START);
3716 }
3717 }
3718
3719 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3720 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3721 data_end, old_data_start - data_end);
3722
3723 offset = btrfs_disk_key_offset(&disk_key);
3724 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3725 btrfs_set_item_key(leaf, &disk_key, slot);
3726 if (slot == 0)
3727 fixup_low_keys(path, &disk_key, 1);
3728 }
3729
3730 item = btrfs_item_nr(slot);
3731 btrfs_set_item_size(leaf, item, new_size);
3732 btrfs_mark_buffer_dirty(leaf);
3733
3734 if (btrfs_leaf_free_space(leaf) < 0) {
3735 btrfs_print_leaf(leaf);
3736 BUG();
3737 }
3738}
3739
3740
3741
3742
3743void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
3744{
3745 int slot;
3746 struct extent_buffer *leaf;
3747 struct btrfs_item *item;
3748 u32 nritems;
3749 unsigned int data_end;
3750 unsigned int old_data;
3751 unsigned int old_size;
3752 int i;
3753 struct btrfs_map_token token;
3754
3755 leaf = path->nodes[0];
3756
3757 nritems = btrfs_header_nritems(leaf);
3758 data_end = leaf_data_end(leaf);
3759
3760 if (btrfs_leaf_free_space(leaf) < data_size) {
3761 btrfs_print_leaf(leaf);
3762 BUG();
3763 }
3764 slot = path->slots[0];
3765 old_data = btrfs_item_end_nr(leaf, slot);
3766
3767 BUG_ON(slot < 0);
3768 if (slot >= nritems) {
3769 btrfs_print_leaf(leaf);
3770 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
3771 slot, nritems);
3772 BUG();
3773 }
3774
3775
3776
3777
3778
3779 btrfs_init_map_token(&token, leaf);
3780 for (i = slot; i < nritems; i++) {
3781 u32 ioff;
3782 item = btrfs_item_nr(i);
3783
3784 ioff = btrfs_token_item_offset(&token, item);
3785 btrfs_set_token_item_offset(&token, item, ioff - data_size);
3786 }
3787
3788
3789 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3790 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
3791 data_end, old_data - data_end);
3792
3793 data_end = old_data;
3794 old_size = btrfs_item_size_nr(leaf, slot);
3795 item = btrfs_item_nr(slot);
3796 btrfs_set_item_size(leaf, item, old_size + data_size);
3797 btrfs_mark_buffer_dirty(leaf);
3798
3799 if (btrfs_leaf_free_space(leaf) < 0) {
3800 btrfs_print_leaf(leaf);
3801 BUG();
3802 }
3803}
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3817 const struct btrfs_key *cpu_key, u32 *data_size,
3818 int nr)
3819{
3820 struct btrfs_fs_info *fs_info = root->fs_info;
3821 struct btrfs_item *item;
3822 int i;
3823 u32 nritems;
3824 unsigned int data_end;
3825 struct btrfs_disk_key disk_key;
3826 struct extent_buffer *leaf;
3827 int slot;
3828 struct btrfs_map_token token;
3829 u32 total_size;
3830 u32 total_data = 0;
3831
3832 for (i = 0; i < nr; i++)
3833 total_data += data_size[i];
3834 total_size = total_data + (nr * sizeof(struct btrfs_item));
3835
3836 if (path->slots[0] == 0) {
3837 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3838 fixup_low_keys(path, &disk_key, 1);
3839 }
3840 btrfs_unlock_up_safe(path, 1);
3841
3842 leaf = path->nodes[0];
3843 slot = path->slots[0];
3844
3845 nritems = btrfs_header_nritems(leaf);
3846 data_end = leaf_data_end(leaf);
3847
3848 if (btrfs_leaf_free_space(leaf) < total_size) {
3849 btrfs_print_leaf(leaf);
3850 btrfs_crit(fs_info, "not enough freespace need %u have %d",
3851 total_size, btrfs_leaf_free_space(leaf));
3852 BUG();
3853 }
3854
3855 btrfs_init_map_token(&token, leaf);
3856 if (slot != nritems) {
3857 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3858
3859 if (old_data < data_end) {
3860 btrfs_print_leaf(leaf);
3861 btrfs_crit(fs_info,
3862 "item at slot %d with data offset %u beyond data end of leaf %u",
3863 slot, old_data, data_end);
3864 BUG();
3865 }
3866
3867
3868
3869
3870 for (i = slot; i < nritems; i++) {
3871 u32 ioff;
3872
3873 item = btrfs_item_nr(i);
3874 ioff = btrfs_token_item_offset(&token, item);
3875 btrfs_set_token_item_offset(&token, item,
3876 ioff - total_data);
3877 }
3878
3879 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3880 btrfs_item_nr_offset(slot),
3881 (nritems - slot) * sizeof(struct btrfs_item));
3882
3883
3884 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3885 data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
3886 data_end, old_data - data_end);
3887 data_end = old_data;
3888 }
3889
3890
3891 for (i = 0; i < nr; i++) {
3892 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3893 btrfs_set_item_key(leaf, &disk_key, slot + i);
3894 item = btrfs_item_nr(slot + i);
3895 data_end -= data_size[i];
3896 btrfs_set_token_item_offset(&token, item, data_end);
3897 btrfs_set_token_item_size(&token, item, data_size[i]);
3898 }
3899
3900 btrfs_set_header_nritems(leaf, nritems + nr);
3901 btrfs_mark_buffer_dirty(leaf);
3902
3903 if (btrfs_leaf_free_space(leaf) < 0) {
3904 btrfs_print_leaf(leaf);
3905 BUG();
3906 }
3907}
3908
3909
3910
3911
3912
3913int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3914 struct btrfs_root *root,
3915 struct btrfs_path *path,
3916 const struct btrfs_key *cpu_key, u32 *data_size,
3917 int nr)
3918{
3919 int ret = 0;
3920 int slot;
3921 int i;
3922 u32 total_size = 0;
3923 u32 total_data = 0;
3924
3925 for (i = 0; i < nr; i++)
3926 total_data += data_size[i];
3927
3928 total_size = total_data + (nr * sizeof(struct btrfs_item));
3929 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3930 if (ret == 0)
3931 return -EEXIST;
3932 if (ret < 0)
3933 return ret;
3934
3935 slot = path->slots[0];
3936 BUG_ON(slot < 0);
3937
3938 setup_items_for_insert(root, path, cpu_key, data_size, nr);
3939 return 0;
3940}
3941
3942
3943
3944
3945
3946int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3947 const struct btrfs_key *cpu_key, void *data,
3948 u32 data_size)
3949{
3950 int ret = 0;
3951 struct btrfs_path *path;
3952 struct extent_buffer *leaf;
3953 unsigned long ptr;
3954
3955 path = btrfs_alloc_path();
3956 if (!path)
3957 return -ENOMEM;
3958 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3959 if (!ret) {
3960 leaf = path->nodes[0];
3961 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3962 write_extent_buffer(leaf, data, ptr, data_size);
3963 btrfs_mark_buffer_dirty(leaf);
3964 }
3965 btrfs_free_path(path);
3966 return ret;
3967}
3968
3969
3970
3971
3972
3973
3974
3975static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
3976 int level, int slot)
3977{
3978 struct extent_buffer *parent = path->nodes[level];
3979 u32 nritems;
3980 int ret;
3981
3982 nritems = btrfs_header_nritems(parent);
3983 if (slot != nritems - 1) {
3984 if (level) {
3985 ret = btrfs_tree_mod_log_insert_move(parent, slot,
3986 slot + 1, nritems - slot - 1);
3987 BUG_ON(ret < 0);
3988 }
3989 memmove_extent_buffer(parent,
3990 btrfs_node_key_ptr_offset(slot),
3991 btrfs_node_key_ptr_offset(slot + 1),
3992 sizeof(struct btrfs_key_ptr) *
3993 (nritems - slot - 1));
3994 } else if (level) {
3995 ret = btrfs_tree_mod_log_insert_key(parent, slot,
3996 BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS);
3997 BUG_ON(ret < 0);
3998 }
3999
4000 nritems--;
4001 btrfs_set_header_nritems(parent, nritems);
4002 if (nritems == 0 && parent == root->node) {
4003 BUG_ON(btrfs_header_level(root->node) != 1);
4004
4005 btrfs_set_header_level(root->node, 0);
4006 } else if (slot == 0) {
4007 struct btrfs_disk_key disk_key;
4008
4009 btrfs_node_key(parent, &disk_key, 0);
4010 fixup_low_keys(path, &disk_key, level + 1);
4011 }
4012 btrfs_mark_buffer_dirty(parent);
4013}
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4026 struct btrfs_root *root,
4027 struct btrfs_path *path,
4028 struct extent_buffer *leaf)
4029{
4030 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4031 del_ptr(root, path, 1, path->slots[1]);
4032
4033
4034
4035
4036
4037 btrfs_unlock_up_safe(path, 0);
4038
4039 root_sub_used(root, leaf->len);
4040
4041 atomic_inc(&leaf->refs);
4042 btrfs_free_tree_block(trans, root, leaf, 0, 1);
4043 free_extent_buffer_stale(leaf);
4044}
4045
4046
4047
4048
4049int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4050 struct btrfs_path *path, int slot, int nr)
4051{
4052 struct btrfs_fs_info *fs_info = root->fs_info;
4053 struct extent_buffer *leaf;
4054 struct btrfs_item *item;
4055 u32 last_off;
4056 u32 dsize = 0;
4057 int ret = 0;
4058 int wret;
4059 int i;
4060 u32 nritems;
4061
4062 leaf = path->nodes[0];
4063 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
4064
4065 for (i = 0; i < nr; i++)
4066 dsize += btrfs_item_size_nr(leaf, slot + i);
4067
4068 nritems = btrfs_header_nritems(leaf);
4069
4070 if (slot + nr != nritems) {
4071 int data_end = leaf_data_end(leaf);
4072 struct btrfs_map_token token;
4073
4074 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4075 data_end + dsize,
4076 BTRFS_LEAF_DATA_OFFSET + data_end,
4077 last_off - data_end);
4078
4079 btrfs_init_map_token(&token, leaf);
4080 for (i = slot + nr; i < nritems; i++) {
4081 u32 ioff;
4082
4083 item = btrfs_item_nr(i);
4084 ioff = btrfs_token_item_offset(&token, item);
4085 btrfs_set_token_item_offset(&token, item, ioff + dsize);
4086 }
4087
4088 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
4089 btrfs_item_nr_offset(slot + nr),
4090 sizeof(struct btrfs_item) *
4091 (nritems - slot - nr));
4092 }
4093 btrfs_set_header_nritems(leaf, nritems - nr);
4094 nritems -= nr;
4095
4096
4097 if (nritems == 0) {
4098 if (leaf == root->node) {
4099 btrfs_set_header_level(leaf, 0);
4100 } else {
4101 btrfs_clean_tree_block(leaf);
4102 btrfs_del_leaf(trans, root, path, leaf);
4103 }
4104 } else {
4105 int used = leaf_space_used(leaf, 0, nritems);
4106 if (slot == 0) {
4107 struct btrfs_disk_key disk_key;
4108
4109 btrfs_item_key(leaf, &disk_key, 0);
4110 fixup_low_keys(path, &disk_key, 1);
4111 }
4112
4113
4114 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
4115
4116
4117
4118
4119 slot = path->slots[1];
4120 atomic_inc(&leaf->refs);
4121
4122 wret = push_leaf_left(trans, root, path, 1, 1,
4123 1, (u32)-1);
4124 if (wret < 0 && wret != -ENOSPC)
4125 ret = wret;
4126
4127 if (path->nodes[0] == leaf &&
4128 btrfs_header_nritems(leaf)) {
4129 wret = push_leaf_right(trans, root, path, 1,
4130 1, 1, 0);
4131 if (wret < 0 && wret != -ENOSPC)
4132 ret = wret;
4133 }
4134
4135 if (btrfs_header_nritems(leaf) == 0) {
4136 path->slots[1] = slot;
4137 btrfs_del_leaf(trans, root, path, leaf);
4138 free_extent_buffer(leaf);
4139 ret = 0;
4140 } else {
4141
4142
4143
4144
4145
4146 if (path->nodes[0] == leaf)
4147 btrfs_mark_buffer_dirty(leaf);
4148 free_extent_buffer(leaf);
4149 }
4150 } else {
4151 btrfs_mark_buffer_dirty(leaf);
4152 }
4153 }
4154 return ret;
4155}
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4166{
4167 struct btrfs_key key;
4168 struct btrfs_disk_key found_key;
4169 int ret;
4170
4171 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
4172
4173 if (key.offset > 0) {
4174 key.offset--;
4175 } else if (key.type > 0) {
4176 key.type--;
4177 key.offset = (u64)-1;
4178 } else if (key.objectid > 0) {
4179 key.objectid--;
4180 key.type = (u8)-1;
4181 key.offset = (u64)-1;
4182 } else {
4183 return 1;
4184 }
4185
4186 btrfs_release_path(path);
4187 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4188 if (ret < 0)
4189 return ret;
4190 btrfs_item_key(path->nodes[0], &found_key, 0);
4191 ret = comp_keys(&found_key, &key);
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202 if (ret <= 0)
4203 return 0;
4204 return 1;
4205}
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4227 struct btrfs_path *path,
4228 u64 min_trans)
4229{
4230 struct extent_buffer *cur;
4231 struct btrfs_key found_key;
4232 int slot;
4233 int sret;
4234 u32 nritems;
4235 int level;
4236 int ret = 1;
4237 int keep_locks = path->keep_locks;
4238
4239 path->keep_locks = 1;
4240again:
4241 cur = btrfs_read_lock_root_node(root);
4242 level = btrfs_header_level(cur);
4243 WARN_ON(path->nodes[level]);
4244 path->nodes[level] = cur;
4245 path->locks[level] = BTRFS_READ_LOCK;
4246
4247 if (btrfs_header_generation(cur) < min_trans) {
4248 ret = 1;
4249 goto out;
4250 }
4251 while (1) {
4252 nritems = btrfs_header_nritems(cur);
4253 level = btrfs_header_level(cur);
4254 sret = btrfs_bin_search(cur, min_key, &slot);
4255 if (sret < 0) {
4256 ret = sret;
4257 goto out;
4258 }
4259
4260
4261 if (level == path->lowest_level) {
4262 if (slot >= nritems)
4263 goto find_next_key;
4264 ret = 0;
4265 path->slots[level] = slot;
4266 btrfs_item_key_to_cpu(cur, &found_key, slot);
4267 goto out;
4268 }
4269 if (sret && slot > 0)
4270 slot--;
4271
4272
4273
4274
4275 while (slot < nritems) {
4276 u64 gen;
4277
4278 gen = btrfs_node_ptr_generation(cur, slot);
4279 if (gen < min_trans) {
4280 slot++;
4281 continue;
4282 }
4283 break;
4284 }
4285find_next_key:
4286
4287
4288
4289
4290 if (slot >= nritems) {
4291 path->slots[level] = slot;
4292 sret = btrfs_find_next_key(root, path, min_key, level,
4293 min_trans);
4294 if (sret == 0) {
4295 btrfs_release_path(path);
4296 goto again;
4297 } else {
4298 goto out;
4299 }
4300 }
4301
4302 btrfs_node_key_to_cpu(cur, &found_key, slot);
4303 path->slots[level] = slot;
4304 if (level == path->lowest_level) {
4305 ret = 0;
4306 goto out;
4307 }
4308 cur = btrfs_read_node_slot(cur, slot);
4309 if (IS_ERR(cur)) {
4310 ret = PTR_ERR(cur);
4311 goto out;
4312 }
4313
4314 btrfs_tree_read_lock(cur);
4315
4316 path->locks[level - 1] = BTRFS_READ_LOCK;
4317 path->nodes[level - 1] = cur;
4318 unlock_up(path, level, 1, 0, NULL);
4319 }
4320out:
4321 path->keep_locks = keep_locks;
4322 if (ret == 0) {
4323 btrfs_unlock_up_safe(path, path->lowest_level + 1);
4324 memcpy(min_key, &found_key, sizeof(found_key));
4325 }
4326 return ret;
4327}
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4341 struct btrfs_key *key, int level, u64 min_trans)
4342{
4343 int slot;
4344 struct extent_buffer *c;
4345
4346 WARN_ON(!path->keep_locks && !path->skip_locking);
4347 while (level < BTRFS_MAX_LEVEL) {
4348 if (!path->nodes[level])
4349 return 1;
4350
4351 slot = path->slots[level] + 1;
4352 c = path->nodes[level];
4353next:
4354 if (slot >= btrfs_header_nritems(c)) {
4355 int ret;
4356 int orig_lowest;
4357 struct btrfs_key cur_key;
4358 if (level + 1 >= BTRFS_MAX_LEVEL ||
4359 !path->nodes[level + 1])
4360 return 1;
4361
4362 if (path->locks[level + 1] || path->skip_locking) {
4363 level++;
4364 continue;
4365 }
4366
4367 slot = btrfs_header_nritems(c) - 1;
4368 if (level == 0)
4369 btrfs_item_key_to_cpu(c, &cur_key, slot);
4370 else
4371 btrfs_node_key_to_cpu(c, &cur_key, slot);
4372
4373 orig_lowest = path->lowest_level;
4374 btrfs_release_path(path);
4375 path->lowest_level = level;
4376 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4377 0, 0);
4378 path->lowest_level = orig_lowest;
4379 if (ret < 0)
4380 return ret;
4381
4382 c = path->nodes[level];
4383 slot = path->slots[level];
4384 if (ret == 0)
4385 slot++;
4386 goto next;
4387 }
4388
4389 if (level == 0)
4390 btrfs_item_key_to_cpu(c, key, slot);
4391 else {
4392 u64 gen = btrfs_node_ptr_generation(c, slot);
4393
4394 if (gen < min_trans) {
4395 slot++;
4396 goto next;
4397 }
4398 btrfs_node_key_to_cpu(c, key, slot);
4399 }
4400 return 0;
4401 }
4402 return 1;
4403}
4404
4405
4406
4407
4408
4409
4410int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4411{
4412 return btrfs_next_old_leaf(root, path, 0);
4413}
4414
4415int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
4416 u64 time_seq)
4417{
4418 int slot;
4419 int level;
4420 struct extent_buffer *c;
4421 struct extent_buffer *next;
4422 struct btrfs_key key;
4423 u32 nritems;
4424 int ret;
4425 int i;
4426
4427 nritems = btrfs_header_nritems(path->nodes[0]);
4428 if (nritems == 0)
4429 return 1;
4430
4431 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4432again:
4433 level = 1;
4434 next = NULL;
4435 btrfs_release_path(path);
4436
4437 path->keep_locks = 1;
4438
4439 if (time_seq)
4440 ret = btrfs_search_old_slot(root, &key, path, time_seq);
4441 else
4442 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4443 path->keep_locks = 0;
4444
4445 if (ret < 0)
4446 return ret;
4447
4448 nritems = btrfs_header_nritems(path->nodes[0]);
4449
4450
4451
4452
4453
4454
4455 if (nritems > 0 && path->slots[0] < nritems - 1) {
4456 if (ret == 0)
4457 path->slots[0]++;
4458 ret = 0;
4459 goto done;
4460 }
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
4476 ret = 0;
4477 goto done;
4478 }
4479
4480 while (level < BTRFS_MAX_LEVEL) {
4481 if (!path->nodes[level]) {
4482 ret = 1;
4483 goto done;
4484 }
4485
4486 slot = path->slots[level] + 1;
4487 c = path->nodes[level];
4488 if (slot >= btrfs_header_nritems(c)) {
4489 level++;
4490 if (level == BTRFS_MAX_LEVEL) {
4491 ret = 1;
4492 goto done;
4493 }
4494 continue;
4495 }
4496
4497
4498
4499
4500
4501
4502
4503 for (i = 0; i < level; i++) {
4504 if (path->locks[level]) {
4505 btrfs_tree_read_unlock(path->nodes[i]);
4506 path->locks[i] = 0;
4507 }
4508 free_extent_buffer(path->nodes[i]);
4509 path->nodes[i] = NULL;
4510 }
4511
4512 next = c;
4513 ret = read_block_for_search(root, path, &next, level,
4514 slot, &key);
4515 if (ret == -EAGAIN)
4516 goto again;
4517
4518 if (ret < 0) {
4519 btrfs_release_path(path);
4520 goto done;
4521 }
4522
4523 if (!path->skip_locking) {
4524 ret = btrfs_try_tree_read_lock(next);
4525 if (!ret && time_seq) {
4526
4527
4528
4529
4530
4531
4532
4533 free_extent_buffer(next);
4534 btrfs_release_path(path);
4535 cond_resched();
4536 goto again;
4537 }
4538 if (!ret)
4539 btrfs_tree_read_lock(next);
4540 }
4541 break;
4542 }
4543 path->slots[level] = slot;
4544 while (1) {
4545 level--;
4546 path->nodes[level] = next;
4547 path->slots[level] = 0;
4548 if (!path->skip_locking)
4549 path->locks[level] = BTRFS_READ_LOCK;
4550 if (!level)
4551 break;
4552
4553 ret = read_block_for_search(root, path, &next, level,
4554 0, &key);
4555 if (ret == -EAGAIN)
4556 goto again;
4557
4558 if (ret < 0) {
4559 btrfs_release_path(path);
4560 goto done;
4561 }
4562
4563 if (!path->skip_locking)
4564 btrfs_tree_read_lock(next);
4565 }
4566 ret = 0;
4567done:
4568 unlock_up(path, 0, 1, 0, NULL);
4569
4570 return ret;
4571}
4572
4573
4574
4575
4576
4577
4578
4579int btrfs_previous_item(struct btrfs_root *root,
4580 struct btrfs_path *path, u64 min_objectid,
4581 int type)
4582{
4583 struct btrfs_key found_key;
4584 struct extent_buffer *leaf;
4585 u32 nritems;
4586 int ret;
4587
4588 while (1) {
4589 if (path->slots[0] == 0) {
4590 ret = btrfs_prev_leaf(root, path);
4591 if (ret != 0)
4592 return ret;
4593 } else {
4594 path->slots[0]--;
4595 }
4596 leaf = path->nodes[0];
4597 nritems = btrfs_header_nritems(leaf);
4598 if (nritems == 0)
4599 return 1;
4600 if (path->slots[0] == nritems)
4601 path->slots[0]--;
4602
4603 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4604 if (found_key.objectid < min_objectid)
4605 break;
4606 if (found_key.type == type)
4607 return 0;
4608 if (found_key.objectid == min_objectid &&
4609 found_key.type < type)
4610 break;
4611 }
4612 return 1;
4613}
4614
4615
4616
4617
4618
4619
4620
4621int btrfs_previous_extent_item(struct btrfs_root *root,
4622 struct btrfs_path *path, u64 min_objectid)
4623{
4624 struct btrfs_key found_key;
4625 struct extent_buffer *leaf;
4626 u32 nritems;
4627 int ret;
4628
4629 while (1) {
4630 if (path->slots[0] == 0) {
4631 ret = btrfs_prev_leaf(root, path);
4632 if (ret != 0)
4633 return ret;
4634 } else {
4635 path->slots[0]--;
4636 }
4637 leaf = path->nodes[0];
4638 nritems = btrfs_header_nritems(leaf);
4639 if (nritems == 0)
4640 return 1;
4641 if (path->slots[0] == nritems)
4642 path->slots[0]--;
4643
4644 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4645 if (found_key.objectid < min_objectid)
4646 break;
4647 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4648 found_key.type == BTRFS_METADATA_ITEM_KEY)
4649 return 0;
4650 if (found_key.objectid == min_objectid &&
4651 found_key.type < BTRFS_EXTENT_ITEM_KEY)
4652 break;
4653 }
4654 return 1;
4655}
4656