1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/fs.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include <linux/time.h>
23#include <linux/init.h>
24#include <linux/string.h>
25#include <linux/backing-dev.h>
26#include <linux/mpage.h>
27#include <linux/falloc.h>
28#include <linux/swap.h>
29#include <linux/writeback.h>
30#include <linux/statfs.h>
31#include <linux/compat.h>
32#include <linux/slab.h>
33#include <linux/btrfs.h>
34#include <linux/uio.h>
35#include "ctree.h"
36#include "disk-io.h"
37#include "transaction.h"
38#include "btrfs_inode.h"
39#include "print-tree.h"
40#include "tree-log.h"
41#include "locking.h"
42#include "volumes.h"
43#include "qgroup.h"
44#include "compression.h"
45
46static struct kmem_cache *btrfs_inode_defrag_cachep;
47
48
49
50
51
52struct inode_defrag {
53 struct rb_node rb_node;
54
55 u64 ino;
56
57
58
59
60 u64 transid;
61
62
63 u64 root;
64
65
66 u64 last_offset;
67
68
69 int cycled;
70};
71
72static int __compare_inode_defrag(struct inode_defrag *defrag1,
73 struct inode_defrag *defrag2)
74{
75 if (defrag1->root > defrag2->root)
76 return 1;
77 else if (defrag1->root < defrag2->root)
78 return -1;
79 else if (defrag1->ino > defrag2->ino)
80 return 1;
81 else if (defrag1->ino < defrag2->ino)
82 return -1;
83 else
84 return 0;
85}
86
87
88
89
90
91
92
93
94
95
96static int __btrfs_add_inode_defrag(struct inode *inode,
97 struct inode_defrag *defrag)
98{
99 struct btrfs_root *root = BTRFS_I(inode)->root;
100 struct inode_defrag *entry;
101 struct rb_node **p;
102 struct rb_node *parent = NULL;
103 int ret;
104
105 p = &root->fs_info->defrag_inodes.rb_node;
106 while (*p) {
107 parent = *p;
108 entry = rb_entry(parent, struct inode_defrag, rb_node);
109
110 ret = __compare_inode_defrag(defrag, entry);
111 if (ret < 0)
112 p = &parent->rb_left;
113 else if (ret > 0)
114 p = &parent->rb_right;
115 else {
116
117
118
119
120 if (defrag->transid < entry->transid)
121 entry->transid = defrag->transid;
122 if (defrag->last_offset > entry->last_offset)
123 entry->last_offset = defrag->last_offset;
124 return -EEXIST;
125 }
126 }
127 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
128 rb_link_node(&defrag->rb_node, parent, p);
129 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
130 return 0;
131}
132
133static inline int __need_auto_defrag(struct btrfs_root *root)
134{
135 if (!btrfs_test_opt(root->fs_info, AUTO_DEFRAG))
136 return 0;
137
138 if (btrfs_fs_closing(root->fs_info))
139 return 0;
140
141 return 1;
142}
143
144
145
146
147
148int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
149 struct inode *inode)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 struct inode_defrag *defrag;
153 u64 transid;
154 int ret;
155
156 if (!__need_auto_defrag(root))
157 return 0;
158
159 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
160 return 0;
161
162 if (trans)
163 transid = trans->transid;
164 else
165 transid = BTRFS_I(inode)->root->last_trans;
166
167 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
168 if (!defrag)
169 return -ENOMEM;
170
171 defrag->ino = btrfs_ino(inode);
172 defrag->transid = transid;
173 defrag->root = root->root_key.objectid;
174
175 spin_lock(&root->fs_info->defrag_inodes_lock);
176 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
177
178
179
180
181
182 ret = __btrfs_add_inode_defrag(inode, defrag);
183 if (ret)
184 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
185 } else {
186 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
187 }
188 spin_unlock(&root->fs_info->defrag_inodes_lock);
189 return 0;
190}
191
192
193
194
195
196
197static void btrfs_requeue_inode_defrag(struct inode *inode,
198 struct inode_defrag *defrag)
199{
200 struct btrfs_root *root = BTRFS_I(inode)->root;
201 int ret;
202
203 if (!__need_auto_defrag(root))
204 goto out;
205
206
207
208
209
210 spin_lock(&root->fs_info->defrag_inodes_lock);
211 ret = __btrfs_add_inode_defrag(inode, defrag);
212 spin_unlock(&root->fs_info->defrag_inodes_lock);
213 if (ret)
214 goto out;
215 return;
216out:
217 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
218}
219
220
221
222
223
224static struct inode_defrag *
225btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
226{
227 struct inode_defrag *entry = NULL;
228 struct inode_defrag tmp;
229 struct rb_node *p;
230 struct rb_node *parent = NULL;
231 int ret;
232
233 tmp.ino = ino;
234 tmp.root = root;
235
236 spin_lock(&fs_info->defrag_inodes_lock);
237 p = fs_info->defrag_inodes.rb_node;
238 while (p) {
239 parent = p;
240 entry = rb_entry(parent, struct inode_defrag, rb_node);
241
242 ret = __compare_inode_defrag(&tmp, entry);
243 if (ret < 0)
244 p = parent->rb_left;
245 else if (ret > 0)
246 p = parent->rb_right;
247 else
248 goto out;
249 }
250
251 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
252 parent = rb_next(parent);
253 if (parent)
254 entry = rb_entry(parent, struct inode_defrag, rb_node);
255 else
256 entry = NULL;
257 }
258out:
259 if (entry)
260 rb_erase(parent, &fs_info->defrag_inodes);
261 spin_unlock(&fs_info->defrag_inodes_lock);
262 return entry;
263}
264
265void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
266{
267 struct inode_defrag *defrag;
268 struct rb_node *node;
269
270 spin_lock(&fs_info->defrag_inodes_lock);
271 node = rb_first(&fs_info->defrag_inodes);
272 while (node) {
273 rb_erase(node, &fs_info->defrag_inodes);
274 defrag = rb_entry(node, struct inode_defrag, rb_node);
275 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
276
277 cond_resched_lock(&fs_info->defrag_inodes_lock);
278
279 node = rb_first(&fs_info->defrag_inodes);
280 }
281 spin_unlock(&fs_info->defrag_inodes_lock);
282}
283
284#define BTRFS_DEFRAG_BATCH 1024
285
286static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
287 struct inode_defrag *defrag)
288{
289 struct btrfs_root *inode_root;
290 struct inode *inode;
291 struct btrfs_key key;
292 struct btrfs_ioctl_defrag_range_args range;
293 int num_defrag;
294 int index;
295 int ret;
296
297
298 key.objectid = defrag->root;
299 key.type = BTRFS_ROOT_ITEM_KEY;
300 key.offset = (u64)-1;
301
302 index = srcu_read_lock(&fs_info->subvol_srcu);
303
304 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
305 if (IS_ERR(inode_root)) {
306 ret = PTR_ERR(inode_root);
307 goto cleanup;
308 }
309
310 key.objectid = defrag->ino;
311 key.type = BTRFS_INODE_ITEM_KEY;
312 key.offset = 0;
313 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
314 if (IS_ERR(inode)) {
315 ret = PTR_ERR(inode);
316 goto cleanup;
317 }
318 srcu_read_unlock(&fs_info->subvol_srcu, index);
319
320
321 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
322 memset(&range, 0, sizeof(range));
323 range.len = (u64)-1;
324 range.start = defrag->last_offset;
325
326 sb_start_write(fs_info->sb);
327 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
328 BTRFS_DEFRAG_BATCH);
329 sb_end_write(fs_info->sb);
330
331
332
333
334
335 if (num_defrag == BTRFS_DEFRAG_BATCH) {
336 defrag->last_offset = range.start;
337 btrfs_requeue_inode_defrag(inode, defrag);
338 } else if (defrag->last_offset && !defrag->cycled) {
339
340
341
342
343
344 defrag->last_offset = 0;
345 defrag->cycled = 1;
346 btrfs_requeue_inode_defrag(inode, defrag);
347 } else {
348 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
349 }
350
351 iput(inode);
352 return 0;
353cleanup:
354 srcu_read_unlock(&fs_info->subvol_srcu, index);
355 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
356 return ret;
357}
358
359
360
361
362
363int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
364{
365 struct inode_defrag *defrag;
366 u64 first_ino = 0;
367 u64 root_objectid = 0;
368
369 atomic_inc(&fs_info->defrag_running);
370 while (1) {
371
372 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
373 &fs_info->fs_state))
374 break;
375
376 if (!__need_auto_defrag(fs_info->tree_root))
377 break;
378
379
380 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
381 first_ino);
382 if (!defrag) {
383 if (root_objectid || first_ino) {
384 root_objectid = 0;
385 first_ino = 0;
386 continue;
387 } else {
388 break;
389 }
390 }
391
392 first_ino = defrag->ino + 1;
393 root_objectid = defrag->root;
394
395 __btrfs_run_defrag_inode(fs_info, defrag);
396 }
397 atomic_dec(&fs_info->defrag_running);
398
399
400
401
402
403 wake_up(&fs_info->transaction_wait);
404 return 0;
405}
406
407
408
409
410static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
411 struct page **prepared_pages,
412 struct iov_iter *i)
413{
414 size_t copied = 0;
415 size_t total_copied = 0;
416 int pg = 0;
417 int offset = pos & (PAGE_SIZE - 1);
418
419 while (write_bytes > 0) {
420 size_t count = min_t(size_t,
421 PAGE_SIZE - offset, write_bytes);
422 struct page *page = prepared_pages[pg];
423
424
425
426 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
427
428
429 flush_dcache_page(page);
430
431
432
433
434
435
436
437
438
439
440 if (!PageUptodate(page) && copied < count)
441 copied = 0;
442
443 iov_iter_advance(i, copied);
444 write_bytes -= copied;
445 total_copied += copied;
446
447
448 if (unlikely(copied == 0))
449 break;
450
451 if (copied < PAGE_SIZE - offset) {
452 offset += copied;
453 } else {
454 pg++;
455 offset = 0;
456 }
457 }
458 return total_copied;
459}
460
461
462
463
464static void btrfs_drop_pages(struct page **pages, size_t num_pages)
465{
466 size_t i;
467 for (i = 0; i < num_pages; i++) {
468
469
470
471
472
473
474 ClearPageChecked(pages[i]);
475 unlock_page(pages[i]);
476 put_page(pages[i]);
477 }
478}
479
480
481
482
483
484
485
486
487
488int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
489 struct page **pages, size_t num_pages,
490 loff_t pos, size_t write_bytes,
491 struct extent_state **cached)
492{
493 int err = 0;
494 int i;
495 u64 num_bytes;
496 u64 start_pos;
497 u64 end_of_last_block;
498 u64 end_pos = pos + write_bytes;
499 loff_t isize = i_size_read(inode);
500
501 start_pos = pos & ~((u64)root->sectorsize - 1);
502 num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
503
504 end_of_last_block = start_pos + num_bytes - 1;
505 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
506 cached, 0);
507 if (err)
508 return err;
509
510 for (i = 0; i < num_pages; i++) {
511 struct page *p = pages[i];
512 SetPageUptodate(p);
513 ClearPageChecked(p);
514 set_page_dirty(p);
515 }
516
517
518
519
520
521
522 if (end_pos > isize)
523 i_size_write(inode, end_pos);
524 return 0;
525}
526
527
528
529
530
531void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
532 int skip_pinned)
533{
534 struct extent_map *em;
535 struct extent_map *split = NULL;
536 struct extent_map *split2 = NULL;
537 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
538 u64 len = end - start + 1;
539 u64 gen;
540 int ret;
541 int testend = 1;
542 unsigned long flags;
543 int compressed = 0;
544 bool modified;
545
546 WARN_ON(end < start);
547 if (end == (u64)-1) {
548 len = (u64)-1;
549 testend = 0;
550 }
551 while (1) {
552 int no_splits = 0;
553
554 modified = false;
555 if (!split)
556 split = alloc_extent_map();
557 if (!split2)
558 split2 = alloc_extent_map();
559 if (!split || !split2)
560 no_splits = 1;
561
562 write_lock(&em_tree->lock);
563 em = lookup_extent_mapping(em_tree, start, len);
564 if (!em) {
565 write_unlock(&em_tree->lock);
566 break;
567 }
568 flags = em->flags;
569 gen = em->generation;
570 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
571 if (testend && em->start + em->len >= start + len) {
572 free_extent_map(em);
573 write_unlock(&em_tree->lock);
574 break;
575 }
576 start = em->start + em->len;
577 if (testend)
578 len = start + len - (em->start + em->len);
579 free_extent_map(em);
580 write_unlock(&em_tree->lock);
581 continue;
582 }
583 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
584 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
585 clear_bit(EXTENT_FLAG_LOGGING, &flags);
586 modified = !list_empty(&em->list);
587 if (no_splits)
588 goto next;
589
590 if (em->start < start) {
591 split->start = em->start;
592 split->len = start - em->start;
593
594 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
595 split->orig_start = em->orig_start;
596 split->block_start = em->block_start;
597
598 if (compressed)
599 split->block_len = em->block_len;
600 else
601 split->block_len = split->len;
602 split->orig_block_len = max(split->block_len,
603 em->orig_block_len);
604 split->ram_bytes = em->ram_bytes;
605 } else {
606 split->orig_start = split->start;
607 split->block_len = 0;
608 split->block_start = em->block_start;
609 split->orig_block_len = 0;
610 split->ram_bytes = split->len;
611 }
612
613 split->generation = gen;
614 split->bdev = em->bdev;
615 split->flags = flags;
616 split->compress_type = em->compress_type;
617 replace_extent_mapping(em_tree, em, split, modified);
618 free_extent_map(split);
619 split = split2;
620 split2 = NULL;
621 }
622 if (testend && em->start + em->len > start + len) {
623 u64 diff = start + len - em->start;
624
625 split->start = start + len;
626 split->len = em->start + em->len - (start + len);
627 split->bdev = em->bdev;
628 split->flags = flags;
629 split->compress_type = em->compress_type;
630 split->generation = gen;
631
632 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
633 split->orig_block_len = max(em->block_len,
634 em->orig_block_len);
635
636 split->ram_bytes = em->ram_bytes;
637 if (compressed) {
638 split->block_len = em->block_len;
639 split->block_start = em->block_start;
640 split->orig_start = em->orig_start;
641 } else {
642 split->block_len = split->len;
643 split->block_start = em->block_start
644 + diff;
645 split->orig_start = em->orig_start;
646 }
647 } else {
648 split->ram_bytes = split->len;
649 split->orig_start = split->start;
650 split->block_len = 0;
651 split->block_start = em->block_start;
652 split->orig_block_len = 0;
653 }
654
655 if (extent_map_in_tree(em)) {
656 replace_extent_mapping(em_tree, em, split,
657 modified);
658 } else {
659 ret = add_extent_mapping(em_tree, split,
660 modified);
661 ASSERT(ret == 0);
662 }
663 free_extent_map(split);
664 split = NULL;
665 }
666next:
667 if (extent_map_in_tree(em))
668 remove_extent_mapping(em_tree, em);
669 write_unlock(&em_tree->lock);
670
671
672 free_extent_map(em);
673
674 free_extent_map(em);
675 }
676 if (split)
677 free_extent_map(split);
678 if (split2)
679 free_extent_map(split2);
680}
681
682
683
684
685
686
687
688
689
690
691int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
692 struct btrfs_root *root, struct inode *inode,
693 struct btrfs_path *path, u64 start, u64 end,
694 u64 *drop_end, int drop_cache,
695 int replace_extent,
696 u32 extent_item_size,
697 int *key_inserted)
698{
699 struct extent_buffer *leaf;
700 struct btrfs_file_extent_item *fi;
701 struct btrfs_key key;
702 struct btrfs_key new_key;
703 u64 ino = btrfs_ino(inode);
704 u64 search_start = start;
705 u64 disk_bytenr = 0;
706 u64 num_bytes = 0;
707 u64 extent_offset = 0;
708 u64 extent_end = 0;
709 int del_nr = 0;
710 int del_slot = 0;
711 int extent_type;
712 int recow;
713 int ret;
714 int modify_tree = -1;
715 int update_refs;
716 int found = 0;
717 int leafs_visited = 0;
718
719 if (drop_cache)
720 btrfs_drop_extent_cache(inode, start, end - 1, 0);
721
722 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
723 modify_tree = 0;
724
725 update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
726 root == root->fs_info->tree_root);
727 while (1) {
728 recow = 0;
729 ret = btrfs_lookup_file_extent(trans, root, path, ino,
730 search_start, modify_tree);
731 if (ret < 0)
732 break;
733 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
734 leaf = path->nodes[0];
735 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
736 if (key.objectid == ino &&
737 key.type == BTRFS_EXTENT_DATA_KEY)
738 path->slots[0]--;
739 }
740 ret = 0;
741 leafs_visited++;
742next_slot:
743 leaf = path->nodes[0];
744 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
745 BUG_ON(del_nr > 0);
746 ret = btrfs_next_leaf(root, path);
747 if (ret < 0)
748 break;
749 if (ret > 0) {
750 ret = 0;
751 break;
752 }
753 leafs_visited++;
754 leaf = path->nodes[0];
755 recow = 1;
756 }
757
758 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
759
760 if (key.objectid > ino)
761 break;
762 if (WARN_ON_ONCE(key.objectid < ino) ||
763 key.type < BTRFS_EXTENT_DATA_KEY) {
764 ASSERT(del_nr == 0);
765 path->slots[0]++;
766 goto next_slot;
767 }
768 if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
769 break;
770
771 fi = btrfs_item_ptr(leaf, path->slots[0],
772 struct btrfs_file_extent_item);
773 extent_type = btrfs_file_extent_type(leaf, fi);
774
775 if (extent_type == BTRFS_FILE_EXTENT_REG ||
776 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
777 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
778 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
779 extent_offset = btrfs_file_extent_offset(leaf, fi);
780 extent_end = key.offset +
781 btrfs_file_extent_num_bytes(leaf, fi);
782 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
783 extent_end = key.offset +
784 btrfs_file_extent_inline_len(leaf,
785 path->slots[0], fi);
786 } else {
787
788 BUG();
789 }
790
791
792
793
794
795
796
797
798
799
800 if (extent_end == key.offset && extent_end >= search_start)
801 goto delete_extent_item;
802
803 if (extent_end <= search_start) {
804 path->slots[0]++;
805 goto next_slot;
806 }
807
808 found = 1;
809 search_start = max(key.offset, start);
810 if (recow || !modify_tree) {
811 modify_tree = -1;
812 btrfs_release_path(path);
813 continue;
814 }
815
816
817
818
819
820 if (start > key.offset && end < extent_end) {
821 BUG_ON(del_nr > 0);
822 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
823 ret = -EOPNOTSUPP;
824 break;
825 }
826
827 memcpy(&new_key, &key, sizeof(new_key));
828 new_key.offset = start;
829 ret = btrfs_duplicate_item(trans, root, path,
830 &new_key);
831 if (ret == -EAGAIN) {
832 btrfs_release_path(path);
833 continue;
834 }
835 if (ret < 0)
836 break;
837
838 leaf = path->nodes[0];
839 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
840 struct btrfs_file_extent_item);
841 btrfs_set_file_extent_num_bytes(leaf, fi,
842 start - key.offset);
843
844 fi = btrfs_item_ptr(leaf, path->slots[0],
845 struct btrfs_file_extent_item);
846
847 extent_offset += start - key.offset;
848 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
849 btrfs_set_file_extent_num_bytes(leaf, fi,
850 extent_end - start);
851 btrfs_mark_buffer_dirty(leaf);
852
853 if (update_refs && disk_bytenr > 0) {
854 ret = btrfs_inc_extent_ref(trans, root,
855 disk_bytenr, num_bytes, 0,
856 root->root_key.objectid,
857 new_key.objectid,
858 start - extent_offset);
859 BUG_ON(ret);
860 }
861 key.offset = start;
862 }
863
864
865
866
867 if (start <= key.offset && end < extent_end) {
868 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
869 ret = -EOPNOTSUPP;
870 break;
871 }
872
873 memcpy(&new_key, &key, sizeof(new_key));
874 new_key.offset = end;
875 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
876
877 extent_offset += end - key.offset;
878 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
879 btrfs_set_file_extent_num_bytes(leaf, fi,
880 extent_end - end);
881 btrfs_mark_buffer_dirty(leaf);
882 if (update_refs && disk_bytenr > 0)
883 inode_sub_bytes(inode, end - key.offset);
884 break;
885 }
886
887 search_start = extent_end;
888
889
890
891
892 if (start > key.offset && end >= extent_end) {
893 BUG_ON(del_nr > 0);
894 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
895 ret = -EOPNOTSUPP;
896 break;
897 }
898
899 btrfs_set_file_extent_num_bytes(leaf, fi,
900 start - key.offset);
901 btrfs_mark_buffer_dirty(leaf);
902 if (update_refs && disk_bytenr > 0)
903 inode_sub_bytes(inode, extent_end - start);
904 if (end == extent_end)
905 break;
906
907 path->slots[0]++;
908 goto next_slot;
909 }
910
911
912
913
914
915 if (start <= key.offset && end >= extent_end) {
916delete_extent_item:
917 if (del_nr == 0) {
918 del_slot = path->slots[0];
919 del_nr = 1;
920 } else {
921 BUG_ON(del_slot + del_nr != path->slots[0]);
922 del_nr++;
923 }
924
925 if (update_refs &&
926 extent_type == BTRFS_FILE_EXTENT_INLINE) {
927 inode_sub_bytes(inode,
928 extent_end - key.offset);
929 extent_end = ALIGN(extent_end,
930 root->sectorsize);
931 } else if (update_refs && disk_bytenr > 0) {
932 ret = btrfs_free_extent(trans, root,
933 disk_bytenr, num_bytes, 0,
934 root->root_key.objectid,
935 key.objectid, key.offset -
936 extent_offset);
937 BUG_ON(ret);
938 inode_sub_bytes(inode,
939 extent_end - key.offset);
940 }
941
942 if (end == extent_end)
943 break;
944
945 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
946 path->slots[0]++;
947 goto next_slot;
948 }
949
950 ret = btrfs_del_items(trans, root, path, del_slot,
951 del_nr);
952 if (ret) {
953 btrfs_abort_transaction(trans, ret);
954 break;
955 }
956
957 del_nr = 0;
958 del_slot = 0;
959
960 btrfs_release_path(path);
961 continue;
962 }
963
964 BUG_ON(1);
965 }
966
967 if (!ret && del_nr > 0) {
968
969
970
971
972
973
974 path->slots[0] = del_slot;
975 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
976 if (ret)
977 btrfs_abort_transaction(trans, ret);
978 }
979
980 leaf = path->nodes[0];
981
982
983
984
985
986 if (!ret && replace_extent && leafs_visited == 1 &&
987 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
988 path->locks[0] == BTRFS_WRITE_LOCK) &&
989 btrfs_leaf_free_space(root, leaf) >=
990 sizeof(struct btrfs_item) + extent_item_size) {
991
992 key.objectid = ino;
993 key.type = BTRFS_EXTENT_DATA_KEY;
994 key.offset = start;
995 if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
996 struct btrfs_key slot_key;
997
998 btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
999 if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
1000 path->slots[0]++;
1001 }
1002 setup_items_for_insert(root, path, &key,
1003 &extent_item_size,
1004 extent_item_size,
1005 sizeof(struct btrfs_item) +
1006 extent_item_size, 1);
1007 *key_inserted = 1;
1008 }
1009
1010 if (!replace_extent || !(*key_inserted))
1011 btrfs_release_path(path);
1012 if (drop_end)
1013 *drop_end = found ? min(end, extent_end) : end;
1014 return ret;
1015}
1016
1017int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1018 struct btrfs_root *root, struct inode *inode, u64 start,
1019 u64 end, int drop_cache)
1020{
1021 struct btrfs_path *path;
1022 int ret;
1023
1024 path = btrfs_alloc_path();
1025 if (!path)
1026 return -ENOMEM;
1027 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
1028 drop_cache, 0, 0, NULL);
1029 btrfs_free_path(path);
1030 return ret;
1031}
1032
1033static int extent_mergeable(struct extent_buffer *leaf, int slot,
1034 u64 objectid, u64 bytenr, u64 orig_offset,
1035 u64 *start, u64 *end)
1036{
1037 struct btrfs_file_extent_item *fi;
1038 struct btrfs_key key;
1039 u64 extent_end;
1040
1041 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
1042 return 0;
1043
1044 btrfs_item_key_to_cpu(leaf, &key, slot);
1045 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
1046 return 0;
1047
1048 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1049 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
1050 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
1051 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
1052 btrfs_file_extent_compression(leaf, fi) ||
1053 btrfs_file_extent_encryption(leaf, fi) ||
1054 btrfs_file_extent_other_encoding(leaf, fi))
1055 return 0;
1056
1057 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1058 if ((*start && *start != key.offset) || (*end && *end != extent_end))
1059 return 0;
1060
1061 *start = key.offset;
1062 *end = extent_end;
1063 return 1;
1064}
1065
1066
1067
1068
1069
1070
1071
1072
1073int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1074 struct inode *inode, u64 start, u64 end)
1075{
1076 struct btrfs_root *root = BTRFS_I(inode)->root;
1077 struct extent_buffer *leaf;
1078 struct btrfs_path *path;
1079 struct btrfs_file_extent_item *fi;
1080 struct btrfs_key key;
1081 struct btrfs_key new_key;
1082 u64 bytenr;
1083 u64 num_bytes;
1084 u64 extent_end;
1085 u64 orig_offset;
1086 u64 other_start;
1087 u64 other_end;
1088 u64 split;
1089 int del_nr = 0;
1090 int del_slot = 0;
1091 int recow;
1092 int ret;
1093 u64 ino = btrfs_ino(inode);
1094
1095 path = btrfs_alloc_path();
1096 if (!path)
1097 return -ENOMEM;
1098again:
1099 recow = 0;
1100 split = start;
1101 key.objectid = ino;
1102 key.type = BTRFS_EXTENT_DATA_KEY;
1103 key.offset = split;
1104
1105 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1106 if (ret < 0)
1107 goto out;
1108 if (ret > 0 && path->slots[0] > 0)
1109 path->slots[0]--;
1110
1111 leaf = path->nodes[0];
1112 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1113 if (key.objectid != ino ||
1114 key.type != BTRFS_EXTENT_DATA_KEY) {
1115 ret = -EINVAL;
1116 btrfs_abort_transaction(trans, ret);
1117 goto out;
1118 }
1119 fi = btrfs_item_ptr(leaf, path->slots[0],
1120 struct btrfs_file_extent_item);
1121 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) {
1122 ret = -EINVAL;
1123 btrfs_abort_transaction(trans, ret);
1124 goto out;
1125 }
1126 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1127 if (key.offset > start || extent_end < end) {
1128 ret = -EINVAL;
1129 btrfs_abort_transaction(trans, ret);
1130 goto out;
1131 }
1132
1133 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1134 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1135 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
1136 memcpy(&new_key, &key, sizeof(new_key));
1137
1138 if (start == key.offset && end < extent_end) {
1139 other_start = 0;
1140 other_end = start;
1141 if (extent_mergeable(leaf, path->slots[0] - 1,
1142 ino, bytenr, orig_offset,
1143 &other_start, &other_end)) {
1144 new_key.offset = end;
1145 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1146 fi = btrfs_item_ptr(leaf, path->slots[0],
1147 struct btrfs_file_extent_item);
1148 btrfs_set_file_extent_generation(leaf, fi,
1149 trans->transid);
1150 btrfs_set_file_extent_num_bytes(leaf, fi,
1151 extent_end - end);
1152 btrfs_set_file_extent_offset(leaf, fi,
1153 end - orig_offset);
1154 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1155 struct btrfs_file_extent_item);
1156 btrfs_set_file_extent_generation(leaf, fi,
1157 trans->transid);
1158 btrfs_set_file_extent_num_bytes(leaf, fi,
1159 end - other_start);
1160 btrfs_mark_buffer_dirty(leaf);
1161 goto out;
1162 }
1163 }
1164
1165 if (start > key.offset && end == extent_end) {
1166 other_start = end;
1167 other_end = 0;
1168 if (extent_mergeable(leaf, path->slots[0] + 1,
1169 ino, bytenr, orig_offset,
1170 &other_start, &other_end)) {
1171 fi = btrfs_item_ptr(leaf, path->slots[0],
1172 struct btrfs_file_extent_item);
1173 btrfs_set_file_extent_num_bytes(leaf, fi,
1174 start - key.offset);
1175 btrfs_set_file_extent_generation(leaf, fi,
1176 trans->transid);
1177 path->slots[0]++;
1178 new_key.offset = start;
1179 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1180
1181 fi = btrfs_item_ptr(leaf, path->slots[0],
1182 struct btrfs_file_extent_item);
1183 btrfs_set_file_extent_generation(leaf, fi,
1184 trans->transid);
1185 btrfs_set_file_extent_num_bytes(leaf, fi,
1186 other_end - start);
1187 btrfs_set_file_extent_offset(leaf, fi,
1188 start - orig_offset);
1189 btrfs_mark_buffer_dirty(leaf);
1190 goto out;
1191 }
1192 }
1193
1194 while (start > key.offset || end < extent_end) {
1195 if (key.offset == start)
1196 split = end;
1197
1198 new_key.offset = split;
1199 ret = btrfs_duplicate_item(trans, root, path, &new_key);
1200 if (ret == -EAGAIN) {
1201 btrfs_release_path(path);
1202 goto again;
1203 }
1204 if (ret < 0) {
1205 btrfs_abort_transaction(trans, ret);
1206 goto out;
1207 }
1208
1209 leaf = path->nodes[0];
1210 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1211 struct btrfs_file_extent_item);
1212 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1213 btrfs_set_file_extent_num_bytes(leaf, fi,
1214 split - key.offset);
1215
1216 fi = btrfs_item_ptr(leaf, path->slots[0],
1217 struct btrfs_file_extent_item);
1218
1219 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1220 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
1221 btrfs_set_file_extent_num_bytes(leaf, fi,
1222 extent_end - split);
1223 btrfs_mark_buffer_dirty(leaf);
1224
1225 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
1226 root->root_key.objectid,
1227 ino, orig_offset);
1228 if (ret) {
1229 btrfs_abort_transaction(trans, ret);
1230 goto out;
1231 }
1232
1233 if (split == start) {
1234 key.offset = start;
1235 } else {
1236 if (start != key.offset) {
1237 ret = -EINVAL;
1238 btrfs_abort_transaction(trans, ret);
1239 goto out;
1240 }
1241 path->slots[0]--;
1242 extent_end = end;
1243 }
1244 recow = 1;
1245 }
1246
1247 other_start = end;
1248 other_end = 0;
1249 if (extent_mergeable(leaf, path->slots[0] + 1,
1250 ino, bytenr, orig_offset,
1251 &other_start, &other_end)) {
1252 if (recow) {
1253 btrfs_release_path(path);
1254 goto again;
1255 }
1256 extent_end = other_end;
1257 del_slot = path->slots[0] + 1;
1258 del_nr++;
1259 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1260 0, root->root_key.objectid,
1261 ino, orig_offset);
1262 if (ret) {
1263 btrfs_abort_transaction(trans, ret);
1264 goto out;
1265 }
1266 }
1267 other_start = 0;
1268 other_end = start;
1269 if (extent_mergeable(leaf, path->slots[0] - 1,
1270 ino, bytenr, orig_offset,
1271 &other_start, &other_end)) {
1272 if (recow) {
1273 btrfs_release_path(path);
1274 goto again;
1275 }
1276 key.offset = other_start;
1277 del_slot = path->slots[0];
1278 del_nr++;
1279 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1280 0, root->root_key.objectid,
1281 ino, orig_offset);
1282 if (ret) {
1283 btrfs_abort_transaction(trans, ret);
1284 goto out;
1285 }
1286 }
1287 if (del_nr == 0) {
1288 fi = btrfs_item_ptr(leaf, path->slots[0],
1289 struct btrfs_file_extent_item);
1290 btrfs_set_file_extent_type(leaf, fi,
1291 BTRFS_FILE_EXTENT_REG);
1292 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1293 btrfs_mark_buffer_dirty(leaf);
1294 } else {
1295 fi = btrfs_item_ptr(leaf, del_slot - 1,
1296 struct btrfs_file_extent_item);
1297 btrfs_set_file_extent_type(leaf, fi,
1298 BTRFS_FILE_EXTENT_REG);
1299 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1300 btrfs_set_file_extent_num_bytes(leaf, fi,
1301 extent_end - key.offset);
1302 btrfs_mark_buffer_dirty(leaf);
1303
1304 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1305 if (ret < 0) {
1306 btrfs_abort_transaction(trans, ret);
1307 goto out;
1308 }
1309 }
1310out:
1311 btrfs_free_path(path);
1312 return 0;
1313}
1314
1315
1316
1317
1318
1319static int prepare_uptodate_page(struct inode *inode,
1320 struct page *page, u64 pos,
1321 bool force_uptodate)
1322{
1323 int ret = 0;
1324
1325 if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
1326 !PageUptodate(page)) {
1327 ret = btrfs_readpage(NULL, page);
1328 if (ret)
1329 return ret;
1330 lock_page(page);
1331 if (!PageUptodate(page)) {
1332 unlock_page(page);
1333 return -EIO;
1334 }
1335 if (page->mapping != inode->i_mapping) {
1336 unlock_page(page);
1337 return -EAGAIN;
1338 }
1339 }
1340 return 0;
1341}
1342
1343
1344
1345
1346static noinline int prepare_pages(struct inode *inode, struct page **pages,
1347 size_t num_pages, loff_t pos,
1348 size_t write_bytes, bool force_uptodate)
1349{
1350 int i;
1351 unsigned long index = pos >> PAGE_SHIFT;
1352 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1353 int err = 0;
1354 int faili;
1355
1356 for (i = 0; i < num_pages; i++) {
1357again:
1358 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1359 mask | __GFP_WRITE);
1360 if (!pages[i]) {
1361 faili = i - 1;
1362 err = -ENOMEM;
1363 goto fail;
1364 }
1365
1366 if (i == 0)
1367 err = prepare_uptodate_page(inode, pages[i], pos,
1368 force_uptodate);
1369 if (!err && i == num_pages - 1)
1370 err = prepare_uptodate_page(inode, pages[i],
1371 pos + write_bytes, false);
1372 if (err) {
1373 put_page(pages[i]);
1374 if (err == -EAGAIN) {
1375 err = 0;
1376 goto again;
1377 }
1378 faili = i - 1;
1379 goto fail;
1380 }
1381 wait_on_page_writeback(pages[i]);
1382 }
1383
1384 return 0;
1385fail:
1386 while (faili >= 0) {
1387 unlock_page(pages[faili]);
1388 put_page(pages[faili]);
1389 faili--;
1390 }
1391 return err;
1392
1393}
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405static noinline int
1406lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1407 size_t num_pages, loff_t pos,
1408 size_t write_bytes,
1409 u64 *lockstart, u64 *lockend,
1410 struct extent_state **cached_state)
1411{
1412 struct btrfs_root *root = BTRFS_I(inode)->root;
1413 u64 start_pos;
1414 u64 last_pos;
1415 int i;
1416 int ret = 0;
1417
1418 start_pos = round_down(pos, root->sectorsize);
1419 last_pos = start_pos
1420 + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
1421
1422 if (start_pos < inode->i_size) {
1423 struct btrfs_ordered_extent *ordered;
1424 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1425 start_pos, last_pos, cached_state);
1426 ordered = btrfs_lookup_ordered_range(inode, start_pos,
1427 last_pos - start_pos + 1);
1428 if (ordered &&
1429 ordered->file_offset + ordered->len > start_pos &&
1430 ordered->file_offset <= last_pos) {
1431 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1432 start_pos, last_pos,
1433 cached_state, GFP_NOFS);
1434 for (i = 0; i < num_pages; i++) {
1435 unlock_page(pages[i]);
1436 put_page(pages[i]);
1437 }
1438 btrfs_start_ordered_extent(inode, ordered, 1);
1439 btrfs_put_ordered_extent(ordered);
1440 return -EAGAIN;
1441 }
1442 if (ordered)
1443 btrfs_put_ordered_extent(ordered);
1444
1445 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
1446 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
1447 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1448 0, 0, cached_state, GFP_NOFS);
1449 *lockstart = start_pos;
1450 *lockend = last_pos;
1451 ret = 1;
1452 }
1453
1454 for (i = 0; i < num_pages; i++) {
1455 if (clear_page_dirty_for_io(pages[i]))
1456 account_page_redirty(pages[i]);
1457 set_page_extent_mapped(pages[i]);
1458 WARN_ON(!PageLocked(pages[i]));
1459 }
1460
1461 return ret;
1462}
1463
1464static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1465 size_t *write_bytes)
1466{
1467 struct btrfs_root *root = BTRFS_I(inode)->root;
1468 struct btrfs_ordered_extent *ordered;
1469 u64 lockstart, lockend;
1470 u64 num_bytes;
1471 int ret;
1472
1473 ret = btrfs_start_write_no_snapshoting(root);
1474 if (!ret)
1475 return -ENOSPC;
1476
1477 lockstart = round_down(pos, root->sectorsize);
1478 lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
1479
1480 while (1) {
1481 lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1482 ordered = btrfs_lookup_ordered_range(inode, lockstart,
1483 lockend - lockstart + 1);
1484 if (!ordered) {
1485 break;
1486 }
1487 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1488 btrfs_start_ordered_extent(inode, ordered, 1);
1489 btrfs_put_ordered_extent(ordered);
1490 }
1491
1492 num_bytes = lockend - lockstart + 1;
1493 ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
1494 if (ret <= 0) {
1495 ret = 0;
1496 btrfs_end_write_no_snapshoting(root);
1497 } else {
1498 *write_bytes = min_t(size_t, *write_bytes ,
1499 num_bytes - pos + lockstart);
1500 }
1501
1502 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1503
1504 return ret;
1505}
1506
1507static noinline ssize_t __btrfs_buffered_write(struct file *file,
1508 struct iov_iter *i,
1509 loff_t pos)
1510{
1511 struct inode *inode = file_inode(file);
1512 struct btrfs_root *root = BTRFS_I(inode)->root;
1513 struct page **pages = NULL;
1514 struct extent_state *cached_state = NULL;
1515 u64 release_bytes = 0;
1516 u64 lockstart;
1517 u64 lockend;
1518 size_t num_written = 0;
1519 int nrptrs;
1520 int ret = 0;
1521 bool only_release_metadata = false;
1522 bool force_page_uptodate = false;
1523 bool need_unlock;
1524
1525 nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
1526 PAGE_SIZE / (sizeof(struct page *)));
1527 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1528 nrptrs = max(nrptrs, 8);
1529 pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
1530 if (!pages)
1531 return -ENOMEM;
1532
1533 while (iov_iter_count(i) > 0) {
1534 size_t offset = pos & (PAGE_SIZE - 1);
1535 size_t sector_offset;
1536 size_t write_bytes = min(iov_iter_count(i),
1537 nrptrs * (size_t)PAGE_SIZE -
1538 offset);
1539 size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
1540 PAGE_SIZE);
1541 size_t reserve_bytes;
1542 size_t dirty_pages;
1543 size_t copied;
1544 size_t dirty_sectors;
1545 size_t num_sectors;
1546
1547 WARN_ON(num_pages > nrptrs);
1548
1549
1550
1551
1552
1553 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1554 ret = -EFAULT;
1555 break;
1556 }
1557
1558 sector_offset = pos & (root->sectorsize - 1);
1559 reserve_bytes = round_up(write_bytes + sector_offset,
1560 root->sectorsize);
1561
1562 ret = btrfs_check_data_free_space(inode, pos, write_bytes);
1563 if (ret < 0) {
1564 if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1565 BTRFS_INODE_PREALLOC)) &&
1566 check_can_nocow(inode, pos, &write_bytes) > 0) {
1567
1568
1569
1570
1571 only_release_metadata = true;
1572
1573
1574
1575
1576 num_pages = DIV_ROUND_UP(write_bytes + offset,
1577 PAGE_SIZE);
1578 reserve_bytes = round_up(write_bytes +
1579 sector_offset,
1580 root->sectorsize);
1581 } else {
1582 break;
1583 }
1584 }
1585
1586 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
1587 if (ret) {
1588 if (!only_release_metadata)
1589 btrfs_free_reserved_data_space(inode, pos,
1590 write_bytes);
1591 else
1592 btrfs_end_write_no_snapshoting(root);
1593 break;
1594 }
1595
1596 release_bytes = reserve_bytes;
1597 need_unlock = false;
1598again:
1599
1600
1601
1602
1603
1604 ret = prepare_pages(inode, pages, num_pages,
1605 pos, write_bytes,
1606 force_page_uptodate);
1607 if (ret)
1608 break;
1609
1610 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1611 pos, write_bytes, &lockstart,
1612 &lockend, &cached_state);
1613 if (ret < 0) {
1614 if (ret == -EAGAIN)
1615 goto again;
1616 break;
1617 } else if (ret > 0) {
1618 need_unlock = true;
1619 ret = 0;
1620 }
1621
1622 copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
1623
1624 num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1625 reserve_bytes);
1626 dirty_sectors = round_up(copied + sector_offset,
1627 root->sectorsize);
1628 dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1629 dirty_sectors);
1630
1631
1632
1633
1634
1635 if (copied < write_bytes)
1636 nrptrs = 1;
1637
1638 if (copied == 0) {
1639 force_page_uptodate = true;
1640 dirty_sectors = 0;
1641 dirty_pages = 0;
1642 } else {
1643 force_page_uptodate = false;
1644 dirty_pages = DIV_ROUND_UP(copied + offset,
1645 PAGE_SIZE);
1646 }
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656 if (num_sectors > dirty_sectors) {
1657
1658
1659 release_bytes -= dirty_sectors <<
1660 root->fs_info->sb->s_blocksize_bits;
1661
1662 if (copied > 0) {
1663 spin_lock(&BTRFS_I(inode)->lock);
1664 BTRFS_I(inode)->outstanding_extents++;
1665 spin_unlock(&BTRFS_I(inode)->lock);
1666 }
1667 if (only_release_metadata) {
1668 btrfs_delalloc_release_metadata(inode,
1669 release_bytes);
1670 } else {
1671 u64 __pos;
1672
1673 __pos = round_down(pos, root->sectorsize) +
1674 (dirty_pages << PAGE_SHIFT);
1675 btrfs_delalloc_release_space(inode, __pos,
1676 release_bytes);
1677 }
1678 }
1679
1680 release_bytes = round_up(copied + sector_offset,
1681 root->sectorsize);
1682
1683 if (copied > 0)
1684 ret = btrfs_dirty_pages(root, inode, pages,
1685 dirty_pages, pos, copied,
1686 NULL);
1687 if (need_unlock)
1688 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1689 lockstart, lockend, &cached_state,
1690 GFP_NOFS);
1691 if (ret) {
1692 btrfs_drop_pages(pages, num_pages);
1693 break;
1694 }
1695
1696 release_bytes = 0;
1697 if (only_release_metadata)
1698 btrfs_end_write_no_snapshoting(root);
1699
1700 if (only_release_metadata && copied > 0) {
1701 lockstart = round_down(pos, root->sectorsize);
1702 lockend = round_up(pos + copied, root->sectorsize) - 1;
1703
1704 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1705 lockend, EXTENT_NORESERVE, NULL,
1706 NULL, GFP_NOFS);
1707 only_release_metadata = false;
1708 }
1709
1710 btrfs_drop_pages(pages, num_pages);
1711
1712 cond_resched();
1713
1714 balance_dirty_pages_ratelimited(inode->i_mapping);
1715 if (dirty_pages < (root->nodesize >> PAGE_SHIFT) + 1)
1716 btrfs_btree_balance_dirty(root);
1717
1718 pos += copied;
1719 num_written += copied;
1720 }
1721
1722 kfree(pages);
1723
1724 if (release_bytes) {
1725 if (only_release_metadata) {
1726 btrfs_end_write_no_snapshoting(root);
1727 btrfs_delalloc_release_metadata(inode, release_bytes);
1728 } else {
1729 btrfs_delalloc_release_space(inode,
1730 round_down(pos, root->sectorsize),
1731 release_bytes);
1732 }
1733 }
1734
1735 return num_written ? num_written : ret;
1736}
1737
1738static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
1739{
1740 struct file *file = iocb->ki_filp;
1741 struct inode *inode = file_inode(file);
1742 loff_t pos = iocb->ki_pos;
1743 ssize_t written;
1744 ssize_t written_buffered;
1745 loff_t endbyte;
1746 int err;
1747
1748 written = generic_file_direct_write(iocb, from);
1749
1750 if (written < 0 || !iov_iter_count(from))
1751 return written;
1752
1753 pos += written;
1754 written_buffered = __btrfs_buffered_write(file, from, pos);
1755 if (written_buffered < 0) {
1756 err = written_buffered;
1757 goto out;
1758 }
1759
1760
1761
1762
1763 endbyte = pos + written_buffered - 1;
1764 err = btrfs_fdatawrite_range(inode, pos, endbyte);
1765 if (err)
1766 goto out;
1767 err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
1768 if (err)
1769 goto out;
1770 written += written_buffered;
1771 iocb->ki_pos = pos + written_buffered;
1772 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
1773 endbyte >> PAGE_SHIFT);
1774out:
1775 return written ? written : err;
1776}
1777
1778static void update_time_for_write(struct inode *inode)
1779{
1780 struct timespec now;
1781
1782 if (IS_NOCMTIME(inode))
1783 return;
1784
1785 now = current_time(inode);
1786 if (!timespec_equal(&inode->i_mtime, &now))
1787 inode->i_mtime = now;
1788
1789 if (!timespec_equal(&inode->i_ctime, &now))
1790 inode->i_ctime = now;
1791
1792 if (IS_I_VERSION(inode))
1793 inode_inc_iversion(inode);
1794}
1795
1796static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1797 struct iov_iter *from)
1798{
1799 struct file *file = iocb->ki_filp;
1800 struct inode *inode = file_inode(file);
1801 struct btrfs_root *root = BTRFS_I(inode)->root;
1802 u64 start_pos;
1803 u64 end_pos;
1804 ssize_t num_written = 0;
1805 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1806 ssize_t err;
1807 loff_t pos;
1808 size_t count;
1809 loff_t oldsize;
1810 int clean_page = 0;
1811
1812 inode_lock(inode);
1813 err = generic_write_checks(iocb, from);
1814 if (err <= 0) {
1815 inode_unlock(inode);
1816 return err;
1817 }
1818
1819 current->backing_dev_info = inode_to_bdi(inode);
1820 err = file_remove_privs(file);
1821 if (err) {
1822 inode_unlock(inode);
1823 goto out;
1824 }
1825
1826
1827
1828
1829
1830
1831
1832 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1833 inode_unlock(inode);
1834 err = -EROFS;
1835 goto out;
1836 }
1837
1838
1839
1840
1841
1842
1843
1844 update_time_for_write(inode);
1845
1846 pos = iocb->ki_pos;
1847 count = iov_iter_count(from);
1848 start_pos = round_down(pos, root->sectorsize);
1849 oldsize = i_size_read(inode);
1850 if (start_pos > oldsize) {
1851
1852 end_pos = round_up(pos + count, root->sectorsize);
1853 err = btrfs_cont_expand(inode, oldsize, end_pos);
1854 if (err) {
1855 inode_unlock(inode);
1856 goto out;
1857 }
1858 if (start_pos > round_up(oldsize, root->sectorsize))
1859 clean_page = 1;
1860 }
1861
1862 if (sync)
1863 atomic_inc(&BTRFS_I(inode)->sync_writers);
1864
1865 if (iocb->ki_flags & IOCB_DIRECT) {
1866 num_written = __btrfs_direct_write(iocb, from);
1867 } else {
1868 num_written = __btrfs_buffered_write(file, from, pos);
1869 if (num_written > 0)
1870 iocb->ki_pos = pos + num_written;
1871 if (clean_page)
1872 pagecache_isize_extended(inode, oldsize,
1873 i_size_read(inode));
1874 }
1875
1876 inode_unlock(inode);
1877
1878
1879
1880
1881
1882
1883 spin_lock(&BTRFS_I(inode)->lock);
1884 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1885 spin_unlock(&BTRFS_I(inode)->lock);
1886 if (num_written > 0)
1887 num_written = generic_write_sync(iocb, num_written);
1888
1889 if (sync)
1890 atomic_dec(&BTRFS_I(inode)->sync_writers);
1891out:
1892 current->backing_dev_info = NULL;
1893 return num_written ? num_written : err;
1894}
1895
1896int btrfs_release_file(struct inode *inode, struct file *filp)
1897{
1898 if (filp->private_data)
1899 btrfs_ioctl_trans_end(filp);
1900
1901
1902
1903
1904
1905
1906 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1907 &BTRFS_I(inode)->runtime_flags))
1908 filemap_flush(inode->i_mapping);
1909 return 0;
1910}
1911
1912static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
1913{
1914 int ret;
1915
1916 atomic_inc(&BTRFS_I(inode)->sync_writers);
1917 ret = btrfs_fdatawrite_range(inode, start, end);
1918 atomic_dec(&BTRFS_I(inode)->sync_writers);
1919
1920 return ret;
1921}
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1935{
1936 struct dentry *dentry = file_dentry(file);
1937 struct inode *inode = d_inode(dentry);
1938 struct btrfs_root *root = BTRFS_I(inode)->root;
1939 struct btrfs_trans_handle *trans;
1940 struct btrfs_log_ctx ctx;
1941 int ret = 0;
1942 bool full_sync = 0;
1943 u64 len;
1944
1945
1946
1947
1948
1949 len = (u64)end - (u64)start + 1;
1950 trace_btrfs_sync_file(file, datasync);
1951
1952
1953
1954
1955
1956
1957
1958 ret = start_ordered_ops(inode, start, end);
1959 if (ret)
1960 return ret;
1961
1962 inode_lock(inode);
1963 atomic_inc(&root->log_batch);
1964 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1965 &BTRFS_I(inode)->runtime_flags);
1966
1967
1968
1969
1970 if (full_sync) {
1971
1972
1973
1974
1975
1976
1977 ret = btrfs_wait_ordered_range(inode, start, len);
1978 } else {
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011 ret = start_ordered_ops(inode, start, end);
2012 }
2013 if (ret) {
2014 inode_unlock(inode);
2015 goto out;
2016 }
2017 atomic_inc(&root->log_batch);
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047 smp_mb();
2048 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
2049 (full_sync && BTRFS_I(inode)->last_trans <=
2050 root->fs_info->last_trans_committed) ||
2051 (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
2052 BTRFS_I(inode)->last_trans
2053 <= root->fs_info->last_trans_committed)) {
2054
2055
2056
2057
2058
2059 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2060 &BTRFS_I(inode)->runtime_flags);
2061
2062
2063
2064
2065
2066
2067
2068 ret = filemap_check_errors(inode->i_mapping);
2069 inode_unlock(inode);
2070 goto out;
2071 }
2072
2073
2074
2075
2076 if (file->private_data)
2077 btrfs_ioctl_trans_end(file);
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090 trans = btrfs_start_transaction(root, 0);
2091 if (IS_ERR(trans)) {
2092 ret = PTR_ERR(trans);
2093 inode_unlock(inode);
2094 goto out;
2095 }
2096 trans->sync = true;
2097
2098 btrfs_init_log_ctx(&ctx, inode);
2099
2100 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2101 if (ret < 0) {
2102
2103 ret = 1;
2104 }
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116 inode_unlock(inode);
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131 if (ctx.io_err) {
2132 btrfs_end_transaction(trans, root);
2133 ret = ctx.io_err;
2134 goto out;
2135 }
2136
2137 if (ret != BTRFS_NO_LOG_SYNC) {
2138 if (!ret) {
2139 ret = btrfs_sync_log(trans, root, &ctx);
2140 if (!ret) {
2141 ret = btrfs_end_transaction(trans, root);
2142 goto out;
2143 }
2144 }
2145 if (!full_sync) {
2146 ret = btrfs_wait_ordered_range(inode, start, len);
2147 if (ret) {
2148 btrfs_end_transaction(trans, root);
2149 goto out;
2150 }
2151 }
2152 ret = btrfs_commit_transaction(trans, root);
2153 } else {
2154 ret = btrfs_end_transaction(trans, root);
2155 }
2156out:
2157 return ret > 0 ? -EIO : ret;
2158}
2159
2160static const struct vm_operations_struct btrfs_file_vm_ops = {
2161 .fault = filemap_fault,
2162 .map_pages = filemap_map_pages,
2163 .page_mkwrite = btrfs_page_mkwrite,
2164};
2165
2166static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
2167{
2168 struct address_space *mapping = filp->f_mapping;
2169
2170 if (!mapping->a_ops->readpage)
2171 return -ENOEXEC;
2172
2173 file_accessed(filp);
2174 vma->vm_ops = &btrfs_file_vm_ops;
2175
2176 return 0;
2177}
2178
2179static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
2180 int slot, u64 start, u64 end)
2181{
2182 struct btrfs_file_extent_item *fi;
2183 struct btrfs_key key;
2184
2185 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
2186 return 0;
2187
2188 btrfs_item_key_to_cpu(leaf, &key, slot);
2189 if (key.objectid != btrfs_ino(inode) ||
2190 key.type != BTRFS_EXTENT_DATA_KEY)
2191 return 0;
2192
2193 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
2194
2195 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2196 return 0;
2197
2198 if (btrfs_file_extent_disk_bytenr(leaf, fi))
2199 return 0;
2200
2201 if (key.offset == end)
2202 return 1;
2203 if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
2204 return 1;
2205 return 0;
2206}
2207
2208static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2209 struct btrfs_path *path, u64 offset, u64 end)
2210{
2211 struct btrfs_root *root = BTRFS_I(inode)->root;
2212 struct extent_buffer *leaf;
2213 struct btrfs_file_extent_item *fi;
2214 struct extent_map *hole_em;
2215 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2216 struct btrfs_key key;
2217 int ret;
2218
2219 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
2220 goto out;
2221
2222 key.objectid = btrfs_ino(inode);
2223 key.type = BTRFS_EXTENT_DATA_KEY;
2224 key.offset = offset;
2225
2226 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2227 if (ret < 0)
2228 return ret;
2229 BUG_ON(!ret);
2230
2231 leaf = path->nodes[0];
2232 if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
2233 u64 num_bytes;
2234
2235 path->slots[0]--;
2236 fi = btrfs_item_ptr(leaf, path->slots[0],
2237 struct btrfs_file_extent_item);
2238 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
2239 end - offset;
2240 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2241 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2242 btrfs_set_file_extent_offset(leaf, fi, 0);
2243 btrfs_mark_buffer_dirty(leaf);
2244 goto out;
2245 }
2246
2247 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2248 u64 num_bytes;
2249
2250 key.offset = offset;
2251 btrfs_set_item_key_safe(root->fs_info, path, &key);
2252 fi = btrfs_item_ptr(leaf, path->slots[0],
2253 struct btrfs_file_extent_item);
2254 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
2255 offset;
2256 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2257 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2258 btrfs_set_file_extent_offset(leaf, fi, 0);
2259 btrfs_mark_buffer_dirty(leaf);
2260 goto out;
2261 }
2262 btrfs_release_path(path);
2263
2264 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
2265 0, 0, end - offset, 0, end - offset,
2266 0, 0, 0);
2267 if (ret)
2268 return ret;
2269
2270out:
2271 btrfs_release_path(path);
2272
2273 hole_em = alloc_extent_map();
2274 if (!hole_em) {
2275 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2276 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2277 &BTRFS_I(inode)->runtime_flags);
2278 } else {
2279 hole_em->start = offset;
2280 hole_em->len = end - offset;
2281 hole_em->ram_bytes = hole_em->len;
2282 hole_em->orig_start = offset;
2283
2284 hole_em->block_start = EXTENT_MAP_HOLE;
2285 hole_em->block_len = 0;
2286 hole_em->orig_block_len = 0;
2287 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
2288 hole_em->compress_type = BTRFS_COMPRESS_NONE;
2289 hole_em->generation = trans->transid;
2290
2291 do {
2292 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2293 write_lock(&em_tree->lock);
2294 ret = add_extent_mapping(em_tree, hole_em, 1);
2295 write_unlock(&em_tree->lock);
2296 } while (ret == -EEXIST);
2297 free_extent_map(hole_em);
2298 if (ret)
2299 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2300 &BTRFS_I(inode)->runtime_flags);
2301 }
2302
2303 return 0;
2304}
2305
2306
2307
2308
2309
2310
2311
2312static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
2313{
2314 struct extent_map *em;
2315 int ret = 0;
2316
2317 em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
2318 if (IS_ERR_OR_NULL(em)) {
2319 if (!em)
2320 ret = -ENOMEM;
2321 else
2322 ret = PTR_ERR(em);
2323 return ret;
2324 }
2325
2326
2327 if (em->block_start == EXTENT_MAP_HOLE) {
2328 ret = 1;
2329 *len = em->start + em->len > *start + *len ?
2330 0 : *start + *len - em->start - em->len;
2331 *start = em->start + em->len;
2332 }
2333 free_extent_map(em);
2334 return ret;
2335}
2336
2337static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2338{
2339 struct btrfs_root *root = BTRFS_I(inode)->root;
2340 struct extent_state *cached_state = NULL;
2341 struct btrfs_path *path;
2342 struct btrfs_block_rsv *rsv;
2343 struct btrfs_trans_handle *trans;
2344 u64 lockstart;
2345 u64 lockend;
2346 u64 tail_start;
2347 u64 tail_len;
2348 u64 orig_start = offset;
2349 u64 cur_offset;
2350 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
2351 u64 drop_end;
2352 int ret = 0;
2353 int err = 0;
2354 unsigned int rsv_count;
2355 bool same_block;
2356 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2357 u64 ino_size;
2358 bool truncated_block = false;
2359 bool updated_inode = false;
2360
2361 ret = btrfs_wait_ordered_range(inode, offset, len);
2362 if (ret)
2363 return ret;
2364
2365 inode_lock(inode);
2366 ino_size = round_up(inode->i_size, root->sectorsize);
2367 ret = find_first_non_hole(inode, &offset, &len);
2368 if (ret < 0)
2369 goto out_only_mutex;
2370 if (ret && !len) {
2371
2372 ret = 0;
2373 goto out_only_mutex;
2374 }
2375
2376 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2377 lockend = round_down(offset + len,
2378 BTRFS_I(inode)->root->sectorsize) - 1;
2379 same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
2380 == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
2381
2382
2383
2384
2385
2386
2387
2388
2389 if (same_block && len < root->sectorsize) {
2390 if (offset < ino_size) {
2391 truncated_block = true;
2392 ret = btrfs_truncate_block(inode, offset, len, 0);
2393 } else {
2394 ret = 0;
2395 }
2396 goto out_only_mutex;
2397 }
2398
2399
2400 if (offset < ino_size) {
2401 truncated_block = true;
2402 ret = btrfs_truncate_block(inode, offset, 0, 0);
2403 if (ret) {
2404 inode_unlock(inode);
2405 return ret;
2406 }
2407 }
2408
2409
2410
2411
2412
2413 if (offset == orig_start) {
2414
2415 len = offset + len - lockstart;
2416 offset = lockstart;
2417 ret = find_first_non_hole(inode, &offset, &len);
2418 if (ret < 0)
2419 goto out_only_mutex;
2420 if (ret && !len) {
2421 ret = 0;
2422 goto out_only_mutex;
2423 }
2424 lockstart = offset;
2425 }
2426
2427
2428 tail_start = lockend + 1;
2429 tail_len = offset + len - tail_start;
2430 if (tail_len) {
2431 ret = find_first_non_hole(inode, &tail_start, &tail_len);
2432 if (unlikely(ret < 0))
2433 goto out_only_mutex;
2434 if (!ret) {
2435
2436 if (tail_start + tail_len < ino_size) {
2437 truncated_block = true;
2438 ret = btrfs_truncate_block(inode,
2439 tail_start + tail_len,
2440 0, 1);
2441 if (ret)
2442 goto out_only_mutex;
2443 }
2444 }
2445 }
2446
2447 if (lockend < lockstart) {
2448 ret = 0;
2449 goto out_only_mutex;
2450 }
2451
2452 while (1) {
2453 struct btrfs_ordered_extent *ordered;
2454
2455 truncate_pagecache_range(inode, lockstart, lockend);
2456
2457 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2458 &cached_state);
2459 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
2460
2461
2462
2463
2464
2465
2466 if ((!ordered ||
2467 (ordered->file_offset + ordered->len <= lockstart ||
2468 ordered->file_offset > lockend)) &&
2469 !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
2470 if (ordered)
2471 btrfs_put_ordered_extent(ordered);
2472 break;
2473 }
2474 if (ordered)
2475 btrfs_put_ordered_extent(ordered);
2476 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
2477 lockend, &cached_state, GFP_NOFS);
2478 ret = btrfs_wait_ordered_range(inode, lockstart,
2479 lockend - lockstart + 1);
2480 if (ret) {
2481 inode_unlock(inode);
2482 return ret;
2483 }
2484 }
2485
2486 path = btrfs_alloc_path();
2487 if (!path) {
2488 ret = -ENOMEM;
2489 goto out;
2490 }
2491
2492 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
2493 if (!rsv) {
2494 ret = -ENOMEM;
2495 goto out_free;
2496 }
2497 rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
2498 rsv->failfast = 1;
2499
2500
2501
2502
2503
2504
2505 rsv_count = no_holes ? 2 : 3;
2506 trans = btrfs_start_transaction(root, rsv_count);
2507 if (IS_ERR(trans)) {
2508 err = PTR_ERR(trans);
2509 goto out_free;
2510 }
2511
2512 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
2513 min_size, 0);
2514 BUG_ON(ret);
2515 trans->block_rsv = rsv;
2516
2517 cur_offset = lockstart;
2518 len = lockend - cur_offset;
2519 while (cur_offset < lockend) {
2520 ret = __btrfs_drop_extents(trans, root, inode, path,
2521 cur_offset, lockend + 1,
2522 &drop_end, 1, 0, 0, NULL);
2523 if (ret != -ENOSPC)
2524 break;
2525
2526 trans->block_rsv = &root->fs_info->trans_block_rsv;
2527
2528 if (cur_offset < ino_size) {
2529 ret = fill_holes(trans, inode, path, cur_offset,
2530 drop_end);
2531 if (ret) {
2532 err = ret;
2533 break;
2534 }
2535 }
2536
2537 cur_offset = drop_end;
2538
2539 ret = btrfs_update_inode(trans, root, inode);
2540 if (ret) {
2541 err = ret;
2542 break;
2543 }
2544
2545 btrfs_end_transaction(trans, root);
2546 btrfs_btree_balance_dirty(root);
2547
2548 trans = btrfs_start_transaction(root, rsv_count);
2549 if (IS_ERR(trans)) {
2550 ret = PTR_ERR(trans);
2551 trans = NULL;
2552 break;
2553 }
2554
2555 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
2556 rsv, min_size, 0);
2557 BUG_ON(ret);
2558 trans->block_rsv = rsv;
2559
2560 ret = find_first_non_hole(inode, &cur_offset, &len);
2561 if (unlikely(ret < 0))
2562 break;
2563 if (ret && !len) {
2564 ret = 0;
2565 break;
2566 }
2567 }
2568
2569 if (ret) {
2570 err = ret;
2571 goto out_trans;
2572 }
2573
2574 trans->block_rsv = &root->fs_info->trans_block_rsv;
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586 if (drop_end <= lockend)
2587 drop_end = lockend + 1;
2588
2589
2590
2591
2592
2593 if (cur_offset < ino_size && cur_offset < drop_end) {
2594 ret = fill_holes(trans, inode, path, cur_offset, drop_end);
2595 if (ret) {
2596 err = ret;
2597 goto out_trans;
2598 }
2599 }
2600
2601out_trans:
2602 if (!trans)
2603 goto out_free;
2604
2605 inode_inc_iversion(inode);
2606 inode->i_mtime = inode->i_ctime = current_time(inode);
2607
2608 trans->block_rsv = &root->fs_info->trans_block_rsv;
2609 ret = btrfs_update_inode(trans, root, inode);
2610 updated_inode = true;
2611 btrfs_end_transaction(trans, root);
2612 btrfs_btree_balance_dirty(root);
2613out_free:
2614 btrfs_free_path(path);
2615 btrfs_free_block_rsv(root, rsv);
2616out:
2617 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2618 &cached_state, GFP_NOFS);
2619out_only_mutex:
2620 if (!updated_inode && truncated_block && !ret && !err) {
2621
2622
2623
2624
2625
2626
2627
2628 trans = btrfs_start_transaction(root, 1);
2629 if (IS_ERR(trans)) {
2630 err = PTR_ERR(trans);
2631 } else {
2632 err = btrfs_update_inode(trans, root, inode);
2633 ret = btrfs_end_transaction(trans, root);
2634 }
2635 }
2636 inode_unlock(inode);
2637 if (ret && !err)
2638 err = ret;
2639 return err;
2640}
2641
2642
2643struct falloc_range {
2644 struct list_head list;
2645 u64 start;
2646 u64 len;
2647};
2648
2649
2650
2651
2652
2653
2654
2655static int add_falloc_range(struct list_head *head, u64 start, u64 len)
2656{
2657 struct falloc_range *prev = NULL;
2658 struct falloc_range *range = NULL;
2659
2660 if (list_empty(head))
2661 goto insert;
2662
2663
2664
2665
2666
2667 prev = list_entry(head->prev, struct falloc_range, list);
2668 if (prev->start + prev->len == start) {
2669 prev->len += len;
2670 return 0;
2671 }
2672insert:
2673 range = kmalloc(sizeof(*range), GFP_KERNEL);
2674 if (!range)
2675 return -ENOMEM;
2676 range->start = start;
2677 range->len = len;
2678 list_add_tail(&range->list, head);
2679 return 0;
2680}
2681
2682static long btrfs_fallocate(struct file *file, int mode,
2683 loff_t offset, loff_t len)
2684{
2685 struct inode *inode = file_inode(file);
2686 struct extent_state *cached_state = NULL;
2687 struct falloc_range *range;
2688 struct falloc_range *tmp;
2689 struct list_head reserve_list;
2690 u64 cur_offset;
2691 u64 last_byte;
2692 u64 alloc_start;
2693 u64 alloc_end;
2694 u64 alloc_hint = 0;
2695 u64 locked_end;
2696 u64 actual_end = 0;
2697 struct extent_map *em;
2698 int blocksize = BTRFS_I(inode)->root->sectorsize;
2699 int ret;
2700
2701 alloc_start = round_down(offset, blocksize);
2702 alloc_end = round_up(offset + len, blocksize);
2703 cur_offset = alloc_start;
2704
2705
2706 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2707 return -EOPNOTSUPP;
2708
2709 if (mode & FALLOC_FL_PUNCH_HOLE)
2710 return btrfs_punch_hole(inode, offset, len);
2711
2712
2713
2714
2715
2716
2717 ret = btrfs_alloc_data_chunk_ondemand(inode, alloc_end - alloc_start);
2718 if (ret < 0)
2719 return ret;
2720
2721 inode_lock(inode);
2722
2723 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
2724 ret = inode_newsize_ok(inode, offset + len);
2725 if (ret)
2726 goto out;
2727 }
2728
2729
2730
2731
2732
2733
2734
2735
2736 if (alloc_start > inode->i_size) {
2737 ret = btrfs_cont_expand(inode, i_size_read(inode),
2738 alloc_start);
2739 if (ret)
2740 goto out;
2741 } else if (offset + len > inode->i_size) {
2742
2743
2744
2745
2746
2747 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
2748 if (ret)
2749 goto out;
2750 }
2751
2752
2753
2754
2755
2756 ret = btrfs_wait_ordered_range(inode, alloc_start,
2757 alloc_end - alloc_start);
2758 if (ret)
2759 goto out;
2760
2761 locked_end = alloc_end - 1;
2762 while (1) {
2763 struct btrfs_ordered_extent *ordered;
2764
2765
2766
2767
2768 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
2769 locked_end, &cached_state);
2770 ordered = btrfs_lookup_first_ordered_extent(inode,
2771 alloc_end - 1);
2772 if (ordered &&
2773 ordered->file_offset + ordered->len > alloc_start &&
2774 ordered->file_offset < alloc_end) {
2775 btrfs_put_ordered_extent(ordered);
2776 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
2777 alloc_start, locked_end,
2778 &cached_state, GFP_KERNEL);
2779
2780
2781
2782
2783 ret = btrfs_wait_ordered_range(inode, alloc_start,
2784 alloc_end - alloc_start);
2785 if (ret)
2786 goto out;
2787 } else {
2788 if (ordered)
2789 btrfs_put_ordered_extent(ordered);
2790 break;
2791 }
2792 }
2793
2794
2795 INIT_LIST_HEAD(&reserve_list);
2796 while (1) {
2797 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
2798 alloc_end - cur_offset, 0);
2799 if (IS_ERR_OR_NULL(em)) {
2800 if (!em)
2801 ret = -ENOMEM;
2802 else
2803 ret = PTR_ERR(em);
2804 break;
2805 }
2806 last_byte = min(extent_map_end(em), alloc_end);
2807 actual_end = min_t(u64, extent_map_end(em), offset + len);
2808 last_byte = ALIGN(last_byte, blocksize);
2809 if (em->block_start == EXTENT_MAP_HOLE ||
2810 (cur_offset >= inode->i_size &&
2811 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
2812 ret = add_falloc_range(&reserve_list, cur_offset,
2813 last_byte - cur_offset);
2814 if (ret < 0) {
2815 free_extent_map(em);
2816 break;
2817 }
2818 ret = btrfs_qgroup_reserve_data(inode, cur_offset,
2819 last_byte - cur_offset);
2820 if (ret < 0)
2821 break;
2822 } else {
2823
2824
2825
2826
2827
2828 btrfs_free_reserved_data_space(inode, cur_offset,
2829 last_byte - cur_offset);
2830 }
2831 free_extent_map(em);
2832 cur_offset = last_byte;
2833 if (cur_offset >= alloc_end)
2834 break;
2835 }
2836
2837
2838
2839
2840
2841 list_for_each_entry_safe(range, tmp, &reserve_list, list) {
2842 if (!ret)
2843 ret = btrfs_prealloc_file_range(inode, mode,
2844 range->start,
2845 range->len, 1 << inode->i_blkbits,
2846 offset + len, &alloc_hint);
2847 else
2848 btrfs_free_reserved_data_space(inode, range->start,
2849 range->len);
2850 list_del(&range->list);
2851 kfree(range);
2852 }
2853 if (ret < 0)
2854 goto out_unlock;
2855
2856 if (actual_end > inode->i_size &&
2857 !(mode & FALLOC_FL_KEEP_SIZE)) {
2858 struct btrfs_trans_handle *trans;
2859 struct btrfs_root *root = BTRFS_I(inode)->root;
2860
2861
2862
2863
2864
2865
2866 trans = btrfs_start_transaction(root, 1);
2867 if (IS_ERR(trans)) {
2868 ret = PTR_ERR(trans);
2869 } else {
2870 inode->i_ctime = current_time(inode);
2871 i_size_write(inode, actual_end);
2872 btrfs_ordered_update_i_size(inode, actual_end, NULL);
2873 ret = btrfs_update_inode(trans, root, inode);
2874 if (ret)
2875 btrfs_end_transaction(trans, root);
2876 else
2877 ret = btrfs_end_transaction(trans, root);
2878 }
2879 }
2880out_unlock:
2881 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2882 &cached_state, GFP_KERNEL);
2883out:
2884 inode_unlock(inode);
2885
2886 if (ret != 0)
2887 btrfs_free_reserved_data_space(inode, alloc_start,
2888 alloc_end - cur_offset);
2889 return ret;
2890}
2891
2892static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2893{
2894 struct btrfs_root *root = BTRFS_I(inode)->root;
2895 struct extent_map *em = NULL;
2896 struct extent_state *cached_state = NULL;
2897 u64 lockstart;
2898 u64 lockend;
2899 u64 start;
2900 u64 len;
2901 int ret = 0;
2902
2903 if (inode->i_size == 0)
2904 return -ENXIO;
2905
2906
2907
2908
2909
2910 start = max_t(loff_t, 0, *offset);
2911
2912 lockstart = round_down(start, root->sectorsize);
2913 lockend = round_up(i_size_read(inode), root->sectorsize);
2914 if (lockend <= lockstart)
2915 lockend = lockstart + root->sectorsize;
2916 lockend--;
2917 len = lockend - lockstart + 1;
2918
2919 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2920 &cached_state);
2921
2922 while (start < inode->i_size) {
2923 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
2924 if (IS_ERR(em)) {
2925 ret = PTR_ERR(em);
2926 em = NULL;
2927 break;
2928 }
2929
2930 if (whence == SEEK_HOLE &&
2931 (em->block_start == EXTENT_MAP_HOLE ||
2932 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2933 break;
2934 else if (whence == SEEK_DATA &&
2935 (em->block_start != EXTENT_MAP_HOLE &&
2936 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2937 break;
2938
2939 start = em->start + em->len;
2940 free_extent_map(em);
2941 em = NULL;
2942 cond_resched();
2943 }
2944 free_extent_map(em);
2945 if (!ret) {
2946 if (whence == SEEK_DATA && start >= inode->i_size)
2947 ret = -ENXIO;
2948 else
2949 *offset = min_t(loff_t, start, inode->i_size);
2950 }
2951 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2952 &cached_state, GFP_NOFS);
2953 return ret;
2954}
2955
2956static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
2957{
2958 struct inode *inode = file->f_mapping->host;
2959 int ret;
2960
2961 inode_lock(inode);
2962 switch (whence) {
2963 case SEEK_END:
2964 case SEEK_CUR:
2965 offset = generic_file_llseek(file, offset, whence);
2966 goto out;
2967 case SEEK_DATA:
2968 case SEEK_HOLE:
2969 if (offset >= i_size_read(inode)) {
2970 inode_unlock(inode);
2971 return -ENXIO;
2972 }
2973
2974 ret = find_desired_extent(inode, &offset, whence);
2975 if (ret) {
2976 inode_unlock(inode);
2977 return ret;
2978 }
2979 }
2980
2981 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
2982out:
2983 inode_unlock(inode);
2984 return offset;
2985}
2986
2987const struct file_operations btrfs_file_operations = {
2988 .llseek = btrfs_file_llseek,
2989 .read_iter = generic_file_read_iter,
2990 .splice_read = generic_file_splice_read,
2991 .write_iter = btrfs_file_write_iter,
2992 .mmap = btrfs_file_mmap,
2993 .open = generic_file_open,
2994 .release = btrfs_release_file,
2995 .fsync = btrfs_sync_file,
2996 .fallocate = btrfs_fallocate,
2997 .unlocked_ioctl = btrfs_ioctl,
2998#ifdef CONFIG_COMPAT
2999 .compat_ioctl = btrfs_compat_ioctl,
3000#endif
3001 .copy_file_range = btrfs_copy_file_range,
3002 .clone_file_range = btrfs_clone_file_range,
3003 .dedupe_file_range = btrfs_dedupe_file_range,
3004};
3005
3006void btrfs_auto_defrag_exit(void)
3007{
3008 kmem_cache_destroy(btrfs_inode_defrag_cachep);
3009}
3010
3011int btrfs_auto_defrag_init(void)
3012{
3013 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
3014 sizeof(struct inode_defrag), 0,
3015 SLAB_MEM_SPREAD,
3016 NULL);
3017 if (!btrfs_inode_defrag_cachep)
3018 return -ENOMEM;
3019
3020 return 0;
3021}
3022
3023int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
3024{
3025 int ret;
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3042 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
3043 &BTRFS_I(inode)->runtime_flags))
3044 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3045
3046 return ret;
3047}
3048