linux/fs/btrfs/send.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/bsearch.h>
  20#include <linux/fs.h>
  21#include <linux/file.h>
  22#include <linux/sort.h>
  23#include <linux/mount.h>
  24#include <linux/xattr.h>
  25#include <linux/posix_acl_xattr.h>
  26#include <linux/radix-tree.h>
  27#include <linux/vmalloc.h>
  28#include <linux/string.h>
  29
  30#include "send.h"
  31#include "backref.h"
  32#include "hash.h"
  33#include "locking.h"
  34#include "disk-io.h"
  35#include "btrfs_inode.h"
  36#include "transaction.h"
  37#include "compression.h"
  38
  39static int g_verbose = 0;
  40
  41#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
  42
  43/*
  44 * A fs_path is a helper to dynamically build path names with unknown size.
  45 * It reallocates the internal buffer on demand.
  46 * It allows fast adding of path elements on the right side (normal path) and
  47 * fast adding to the left side (reversed path). A reversed path can also be
  48 * unreversed if needed.
  49 */
  50struct fs_path {
  51        union {
  52                struct {
  53                        char *start;
  54                        char *end;
  55
  56                        char *buf;
  57                        unsigned short buf_len:15;
  58                        unsigned short reversed:1;
  59                        char inline_buf[];
  60                };
  61                /*
  62                 * Average path length does not exceed 200 bytes, we'll have
  63                 * better packing in the slab and higher chance to satisfy
  64                 * a allocation later during send.
  65                 */
  66                char pad[256];
  67        };
  68};
  69#define FS_PATH_INLINE_SIZE \
  70        (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
  71
  72
  73/* reused for each extent */
  74struct clone_root {
  75        struct btrfs_root *root;
  76        u64 ino;
  77        u64 offset;
  78
  79        u64 found_refs;
  80};
  81
  82#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
  83#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
  84
  85struct send_ctx {
  86        struct file *send_filp;
  87        loff_t send_off;
  88        char *send_buf;
  89        u32 send_size;
  90        u32 send_max_size;
  91        u64 total_send_size;
  92        u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
  93        u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
  94
  95        struct btrfs_root *send_root;
  96        struct btrfs_root *parent_root;
  97        struct clone_root *clone_roots;
  98        int clone_roots_cnt;
  99
 100        /* current state of the compare_tree call */
 101        struct btrfs_path *left_path;
 102        struct btrfs_path *right_path;
 103        struct btrfs_key *cmp_key;
 104
 105        /*
 106         * infos of the currently processed inode. In case of deleted inodes,
 107         * these are the values from the deleted inode.
 108         */
 109        u64 cur_ino;
 110        u64 cur_inode_gen;
 111        int cur_inode_new;
 112        int cur_inode_new_gen;
 113        int cur_inode_deleted;
 114        u64 cur_inode_size;
 115        u64 cur_inode_mode;
 116        u64 cur_inode_rdev;
 117        u64 cur_inode_last_extent;
 118
 119        u64 send_progress;
 120
 121        struct list_head new_refs;
 122        struct list_head deleted_refs;
 123
 124        struct radix_tree_root name_cache;
 125        struct list_head name_cache_list;
 126        int name_cache_size;
 127
 128        struct file_ra_state ra;
 129
 130        char *read_buf;
 131
 132        /*
 133         * We process inodes by their increasing order, so if before an
 134         * incremental send we reverse the parent/child relationship of
 135         * directories such that a directory with a lower inode number was
 136         * the parent of a directory with a higher inode number, and the one
 137         * becoming the new parent got renamed too, we can't rename/move the
 138         * directory with lower inode number when we finish processing it - we
 139         * must process the directory with higher inode number first, then
 140         * rename/move it and then rename/move the directory with lower inode
 141         * number. Example follows.
 142         *
 143         * Tree state when the first send was performed:
 144         *
 145         * .
 146         * |-- a                   (ino 257)
 147         *     |-- b               (ino 258)
 148         *         |
 149         *         |
 150         *         |-- c           (ino 259)
 151         *         |   |-- d       (ino 260)
 152         *         |
 153         *         |-- c2          (ino 261)
 154         *
 155         * Tree state when the second (incremental) send is performed:
 156         *
 157         * .
 158         * |-- a                   (ino 257)
 159         *     |-- b               (ino 258)
 160         *         |-- c2          (ino 261)
 161         *             |-- d2      (ino 260)
 162         *                 |-- cc  (ino 259)
 163         *
 164         * The sequence of steps that lead to the second state was:
 165         *
 166         * mv /a/b/c/d /a/b/c2/d2
 167         * mv /a/b/c /a/b/c2/d2/cc
 168         *
 169         * "c" has lower inode number, but we can't move it (2nd mv operation)
 170         * before we move "d", which has higher inode number.
 171         *
 172         * So we just memorize which move/rename operations must be performed
 173         * later when their respective parent is processed and moved/renamed.
 174         */
 175
 176        /* Indexed by parent directory inode number. */
 177        struct rb_root pending_dir_moves;
 178
 179        /*
 180         * Reverse index, indexed by the inode number of a directory that
 181         * is waiting for the move/rename of its immediate parent before its
 182         * own move/rename can be performed.
 183         */
 184        struct rb_root waiting_dir_moves;
 185
 186        /*
 187         * A directory that is going to be rm'ed might have a child directory
 188         * which is in the pending directory moves index above. In this case,
 189         * the directory can only be removed after the move/rename of its child
 190         * is performed. Example:
 191         *
 192         * Parent snapshot:
 193         *
 194         * .                        (ino 256)
 195         * |-- a/                   (ino 257)
 196         *     |-- b/               (ino 258)
 197         *         |-- c/           (ino 259)
 198         *         |   |-- x/       (ino 260)
 199         *         |
 200         *         |-- y/           (ino 261)
 201         *
 202         * Send snapshot:
 203         *
 204         * .                        (ino 256)
 205         * |-- a/                   (ino 257)
 206         *     |-- b/               (ino 258)
 207         *         |-- YY/          (ino 261)
 208         *              |-- x/      (ino 260)
 209         *
 210         * Sequence of steps that lead to the send snapshot:
 211         * rm -f /a/b/c/foo.txt
 212         * mv /a/b/y /a/b/YY
 213         * mv /a/b/c/x /a/b/YY
 214         * rmdir /a/b/c
 215         *
 216         * When the child is processed, its move/rename is delayed until its
 217         * parent is processed (as explained above), but all other operations
 218         * like update utimes, chown, chgrp, etc, are performed and the paths
 219         * that it uses for those operations must use the orphanized name of
 220         * its parent (the directory we're going to rm later), so we need to
 221         * memorize that name.
 222         *
 223         * Indexed by the inode number of the directory to be deleted.
 224         */
 225        struct rb_root orphan_dirs;
 226};
 227
 228struct pending_dir_move {
 229        struct rb_node node;
 230        struct list_head list;
 231        u64 parent_ino;
 232        u64 ino;
 233        u64 gen;
 234        struct list_head update_refs;
 235};
 236
 237struct waiting_dir_move {
 238        struct rb_node node;
 239        u64 ino;
 240        /*
 241         * There might be some directory that could not be removed because it
 242         * was waiting for this directory inode to be moved first. Therefore
 243         * after this directory is moved, we can try to rmdir the ino rmdir_ino.
 244         */
 245        u64 rmdir_ino;
 246        bool orphanized;
 247};
 248
 249struct orphan_dir_info {
 250        struct rb_node node;
 251        u64 ino;
 252        u64 gen;
 253};
 254
 255struct name_cache_entry {
 256        struct list_head list;
 257        /*
 258         * radix_tree has only 32bit entries but we need to handle 64bit inums.
 259         * We use the lower 32bit of the 64bit inum to store it in the tree. If
 260         * more then one inum would fall into the same entry, we use radix_list
 261         * to store the additional entries. radix_list is also used to store
 262         * entries where two entries have the same inum but different
 263         * generations.
 264         */
 265        struct list_head radix_list;
 266        u64 ino;
 267        u64 gen;
 268        u64 parent_ino;
 269        u64 parent_gen;
 270        int ret;
 271        int need_later_update;
 272        int name_len;
 273        char name[];
 274};
 275
 276static void inconsistent_snapshot_error(struct send_ctx *sctx,
 277                                        enum btrfs_compare_tree_result result,
 278                                        const char *what)
 279{
 280        const char *result_string;
 281
 282        switch (result) {
 283        case BTRFS_COMPARE_TREE_NEW:
 284                result_string = "new";
 285                break;
 286        case BTRFS_COMPARE_TREE_DELETED:
 287                result_string = "deleted";
 288                break;
 289        case BTRFS_COMPARE_TREE_CHANGED:
 290                result_string = "updated";
 291                break;
 292        case BTRFS_COMPARE_TREE_SAME:
 293                ASSERT(0);
 294                result_string = "unchanged";
 295                break;
 296        default:
 297                ASSERT(0);
 298                result_string = "unexpected";
 299        }
 300
 301        btrfs_err(sctx->send_root->fs_info,
 302                  "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
 303                  result_string, what, sctx->cmp_key->objectid,
 304                  sctx->send_root->root_key.objectid,
 305                  (sctx->parent_root ?
 306                   sctx->parent_root->root_key.objectid : 0));
 307}
 308
 309static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 310
 311static struct waiting_dir_move *
 312get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
 313
 314static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
 315
 316static int need_send_hole(struct send_ctx *sctx)
 317{
 318        return (sctx->parent_root && !sctx->cur_inode_new &&
 319                !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
 320                S_ISREG(sctx->cur_inode_mode));
 321}
 322
 323static void fs_path_reset(struct fs_path *p)
 324{
 325        if (p->reversed) {
 326                p->start = p->buf + p->buf_len - 1;
 327                p->end = p->start;
 328                *p->start = 0;
 329        } else {
 330                p->start = p->buf;
 331                p->end = p->start;
 332                *p->start = 0;
 333        }
 334}
 335
 336static struct fs_path *fs_path_alloc(void)
 337{
 338        struct fs_path *p;
 339
 340        p = kmalloc(sizeof(*p), GFP_KERNEL);
 341        if (!p)
 342                return NULL;
 343        p->reversed = 0;
 344        p->buf = p->inline_buf;
 345        p->buf_len = FS_PATH_INLINE_SIZE;
 346        fs_path_reset(p);
 347        return p;
 348}
 349
 350static struct fs_path *fs_path_alloc_reversed(void)
 351{
 352        struct fs_path *p;
 353
 354        p = fs_path_alloc();
 355        if (!p)
 356                return NULL;
 357        p->reversed = 1;
 358        fs_path_reset(p);
 359        return p;
 360}
 361
 362static void fs_path_free(struct fs_path *p)
 363{
 364        if (!p)
 365                return;
 366        if (p->buf != p->inline_buf)
 367                kfree(p->buf);
 368        kfree(p);
 369}
 370
 371static int fs_path_len(struct fs_path *p)
 372{
 373        return p->end - p->start;
 374}
 375
 376static int fs_path_ensure_buf(struct fs_path *p, int len)
 377{
 378        char *tmp_buf;
 379        int path_len;
 380        int old_buf_len;
 381
 382        len++;
 383
 384        if (p->buf_len >= len)
 385                return 0;
 386
 387        if (len > PATH_MAX) {
 388                WARN_ON(1);
 389                return -ENOMEM;
 390        }
 391
 392        path_len = p->end - p->start;
 393        old_buf_len = p->buf_len;
 394
 395        /*
 396         * First time the inline_buf does not suffice
 397         */
 398        if (p->buf == p->inline_buf) {
 399                tmp_buf = kmalloc(len, GFP_KERNEL);
 400                if (tmp_buf)
 401                        memcpy(tmp_buf, p->buf, old_buf_len);
 402        } else {
 403                tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
 404        }
 405        if (!tmp_buf)
 406                return -ENOMEM;
 407        p->buf = tmp_buf;
 408        /*
 409         * The real size of the buffer is bigger, this will let the fast path
 410         * happen most of the time
 411         */
 412        p->buf_len = ksize(p->buf);
 413
 414        if (p->reversed) {
 415                tmp_buf = p->buf + old_buf_len - path_len - 1;
 416                p->end = p->buf + p->buf_len - 1;
 417                p->start = p->end - path_len;
 418                memmove(p->start, tmp_buf, path_len + 1);
 419        } else {
 420                p->start = p->buf;
 421                p->end = p->start + path_len;
 422        }
 423        return 0;
 424}
 425
 426static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
 427                                   char **prepared)
 428{
 429        int ret;
 430        int new_len;
 431
 432        new_len = p->end - p->start + name_len;
 433        if (p->start != p->end)
 434                new_len++;
 435        ret = fs_path_ensure_buf(p, new_len);
 436        if (ret < 0)
 437                goto out;
 438
 439        if (p->reversed) {
 440                if (p->start != p->end)
 441                        *--p->start = '/';
 442                p->start -= name_len;
 443                *prepared = p->start;
 444        } else {
 445                if (p->start != p->end)
 446                        *p->end++ = '/';
 447                *prepared = p->end;
 448                p->end += name_len;
 449                *p->end = 0;
 450        }
 451
 452out:
 453        return ret;
 454}
 455
 456static int fs_path_add(struct fs_path *p, const char *name, int name_len)
 457{
 458        int ret;
 459        char *prepared;
 460
 461        ret = fs_path_prepare_for_add(p, name_len, &prepared);
 462        if (ret < 0)
 463                goto out;
 464        memcpy(prepared, name, name_len);
 465
 466out:
 467        return ret;
 468}
 469
 470static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
 471{
 472        int ret;
 473        char *prepared;
 474
 475        ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
 476        if (ret < 0)
 477                goto out;
 478        memcpy(prepared, p2->start, p2->end - p2->start);
 479
 480out:
 481        return ret;
 482}
 483
 484static int fs_path_add_from_extent_buffer(struct fs_path *p,
 485                                          struct extent_buffer *eb,
 486                                          unsigned long off, int len)
 487{
 488        int ret;
 489        char *prepared;
 490
 491        ret = fs_path_prepare_for_add(p, len, &prepared);
 492        if (ret < 0)
 493                goto out;
 494
 495        read_extent_buffer(eb, prepared, off, len);
 496
 497out:
 498        return ret;
 499}
 500
 501static int fs_path_copy(struct fs_path *p, struct fs_path *from)
 502{
 503        int ret;
 504
 505        p->reversed = from->reversed;
 506        fs_path_reset(p);
 507
 508        ret = fs_path_add_path(p, from);
 509
 510        return ret;
 511}
 512
 513
 514static void fs_path_unreverse(struct fs_path *p)
 515{
 516        char *tmp;
 517        int len;
 518
 519        if (!p->reversed)
 520                return;
 521
 522        tmp = p->start;
 523        len = p->end - p->start;
 524        p->start = p->buf;
 525        p->end = p->start + len;
 526        memmove(p->start, tmp, len + 1);
 527        p->reversed = 0;
 528}
 529
 530static struct btrfs_path *alloc_path_for_send(void)
 531{
 532        struct btrfs_path *path;
 533
 534        path = btrfs_alloc_path();
 535        if (!path)
 536                return NULL;
 537        path->search_commit_root = 1;
 538        path->skip_locking = 1;
 539        path->need_commit_sem = 1;
 540        return path;
 541}
 542
 543static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
 544{
 545        int ret;
 546        mm_segment_t old_fs;
 547        u32 pos = 0;
 548
 549        old_fs = get_fs();
 550        set_fs(KERNEL_DS);
 551
 552        while (pos < len) {
 553                ret = vfs_write(filp, (__force const char __user *)buf + pos,
 554                                len - pos, off);
 555                /* TODO handle that correctly */
 556                /*if (ret == -ERESTARTSYS) {
 557                        continue;
 558                }*/
 559                if (ret < 0)
 560                        goto out;
 561                if (ret == 0) {
 562                        ret = -EIO;
 563                        goto out;
 564                }
 565                pos += ret;
 566        }
 567
 568        ret = 0;
 569
 570out:
 571        set_fs(old_fs);
 572        return ret;
 573}
 574
 575static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
 576{
 577        struct btrfs_tlv_header *hdr;
 578        int total_len = sizeof(*hdr) + len;
 579        int left = sctx->send_max_size - sctx->send_size;
 580
 581        if (unlikely(left < total_len))
 582                return -EOVERFLOW;
 583
 584        hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
 585        hdr->tlv_type = cpu_to_le16(attr);
 586        hdr->tlv_len = cpu_to_le16(len);
 587        memcpy(hdr + 1, data, len);
 588        sctx->send_size += total_len;
 589
 590        return 0;
 591}
 592
 593#define TLV_PUT_DEFINE_INT(bits) \
 594        static int tlv_put_u##bits(struct send_ctx *sctx,               \
 595                        u##bits attr, u##bits value)                    \
 596        {                                                               \
 597                __le##bits __tmp = cpu_to_le##bits(value);              \
 598                return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));      \
 599        }
 600
 601TLV_PUT_DEFINE_INT(64)
 602
 603static int tlv_put_string(struct send_ctx *sctx, u16 attr,
 604                          const char *str, int len)
 605{
 606        if (len == -1)
 607                len = strlen(str);
 608        return tlv_put(sctx, attr, str, len);
 609}
 610
 611static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
 612                        const u8 *uuid)
 613{
 614        return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
 615}
 616
 617static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
 618                                  struct extent_buffer *eb,
 619                                  struct btrfs_timespec *ts)
 620{
 621        struct btrfs_timespec bts;
 622        read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
 623        return tlv_put(sctx, attr, &bts, sizeof(bts));
 624}
 625
 626
 627#define TLV_PUT(sctx, attrtype, attrlen, data) \
 628        do { \
 629                ret = tlv_put(sctx, attrtype, attrlen, data); \
 630                if (ret < 0) \
 631                        goto tlv_put_failure; \
 632        } while (0)
 633
 634#define TLV_PUT_INT(sctx, attrtype, bits, value) \
 635        do { \
 636                ret = tlv_put_u##bits(sctx, attrtype, value); \
 637                if (ret < 0) \
 638                        goto tlv_put_failure; \
 639        } while (0)
 640
 641#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
 642#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
 643#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
 644#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
 645#define TLV_PUT_STRING(sctx, attrtype, str, len) \
 646        do { \
 647                ret = tlv_put_string(sctx, attrtype, str, len); \
 648                if (ret < 0) \
 649                        goto tlv_put_failure; \
 650        } while (0)
 651#define TLV_PUT_PATH(sctx, attrtype, p) \
 652        do { \
 653                ret = tlv_put_string(sctx, attrtype, p->start, \
 654                        p->end - p->start); \
 655                if (ret < 0) \
 656                        goto tlv_put_failure; \
 657        } while(0)
 658#define TLV_PUT_UUID(sctx, attrtype, uuid) \
 659        do { \
 660                ret = tlv_put_uuid(sctx, attrtype, uuid); \
 661                if (ret < 0) \
 662                        goto tlv_put_failure; \
 663        } while (0)
 664#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
 665        do { \
 666                ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
 667                if (ret < 0) \
 668                        goto tlv_put_failure; \
 669        } while (0)
 670
 671static int send_header(struct send_ctx *sctx)
 672{
 673        struct btrfs_stream_header hdr;
 674
 675        strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
 676        hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
 677
 678        return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
 679                                        &sctx->send_off);
 680}
 681
 682/*
 683 * For each command/item we want to send to userspace, we call this function.
 684 */
 685static int begin_cmd(struct send_ctx *sctx, int cmd)
 686{
 687        struct btrfs_cmd_header *hdr;
 688
 689        if (WARN_ON(!sctx->send_buf))
 690                return -EINVAL;
 691
 692        BUG_ON(sctx->send_size);
 693
 694        sctx->send_size += sizeof(*hdr);
 695        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 696        hdr->cmd = cpu_to_le16(cmd);
 697
 698        return 0;
 699}
 700
 701static int send_cmd(struct send_ctx *sctx)
 702{
 703        int ret;
 704        struct btrfs_cmd_header *hdr;
 705        u32 crc;
 706
 707        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 708        hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 709        hdr->crc = 0;
 710
 711        crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 712        hdr->crc = cpu_to_le32(crc);
 713
 714        ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
 715                                        &sctx->send_off);
 716
 717        sctx->total_send_size += sctx->send_size;
 718        sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
 719        sctx->send_size = 0;
 720
 721        return ret;
 722}
 723
 724/*
 725 * Sends a move instruction to user space
 726 */
 727static int send_rename(struct send_ctx *sctx,
 728                     struct fs_path *from, struct fs_path *to)
 729{
 730        int ret;
 731
 732verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
 733
 734        ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
 735        if (ret < 0)
 736                goto out;
 737
 738        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
 739        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
 740
 741        ret = send_cmd(sctx);
 742
 743tlv_put_failure:
 744out:
 745        return ret;
 746}
 747
 748/*
 749 * Sends a link instruction to user space
 750 */
 751static int send_link(struct send_ctx *sctx,
 752                     struct fs_path *path, struct fs_path *lnk)
 753{
 754        int ret;
 755
 756verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
 757
 758        ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
 759        if (ret < 0)
 760                goto out;
 761
 762        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 763        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
 764
 765        ret = send_cmd(sctx);
 766
 767tlv_put_failure:
 768out:
 769        return ret;
 770}
 771
 772/*
 773 * Sends an unlink instruction to user space
 774 */
 775static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
 776{
 777        int ret;
 778
 779verbose_printk("btrfs: send_unlink %s\n", path->start);
 780
 781        ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
 782        if (ret < 0)
 783                goto out;
 784
 785        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 786
 787        ret = send_cmd(sctx);
 788
 789tlv_put_failure:
 790out:
 791        return ret;
 792}
 793
 794/*
 795 * Sends a rmdir instruction to user space
 796 */
 797static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
 798{
 799        int ret;
 800
 801verbose_printk("btrfs: send_rmdir %s\n", path->start);
 802
 803        ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
 804        if (ret < 0)
 805                goto out;
 806
 807        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 808
 809        ret = send_cmd(sctx);
 810
 811tlv_put_failure:
 812out:
 813        return ret;
 814}
 815
 816/*
 817 * Helper function to retrieve some fields from an inode item.
 818 */
 819static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
 820                          u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
 821                          u64 *gid, u64 *rdev)
 822{
 823        int ret;
 824        struct btrfs_inode_item *ii;
 825        struct btrfs_key key;
 826
 827        key.objectid = ino;
 828        key.type = BTRFS_INODE_ITEM_KEY;
 829        key.offset = 0;
 830        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 831        if (ret) {
 832                if (ret > 0)
 833                        ret = -ENOENT;
 834                return ret;
 835        }
 836
 837        ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
 838                        struct btrfs_inode_item);
 839        if (size)
 840                *size = btrfs_inode_size(path->nodes[0], ii);
 841        if (gen)
 842                *gen = btrfs_inode_generation(path->nodes[0], ii);
 843        if (mode)
 844                *mode = btrfs_inode_mode(path->nodes[0], ii);
 845        if (uid)
 846                *uid = btrfs_inode_uid(path->nodes[0], ii);
 847        if (gid)
 848                *gid = btrfs_inode_gid(path->nodes[0], ii);
 849        if (rdev)
 850                *rdev = btrfs_inode_rdev(path->nodes[0], ii);
 851
 852        return ret;
 853}
 854
 855static int get_inode_info(struct btrfs_root *root,
 856                          u64 ino, u64 *size, u64 *gen,
 857                          u64 *mode, u64 *uid, u64 *gid,
 858                          u64 *rdev)
 859{
 860        struct btrfs_path *path;
 861        int ret;
 862
 863        path = alloc_path_for_send();
 864        if (!path)
 865                return -ENOMEM;
 866        ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
 867                               rdev);
 868        btrfs_free_path(path);
 869        return ret;
 870}
 871
 872typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
 873                                   struct fs_path *p,
 874                                   void *ctx);
 875
 876/*
 877 * Helper function to iterate the entries in ONE btrfs_inode_ref or
 878 * btrfs_inode_extref.
 879 * The iterate callback may return a non zero value to stop iteration. This can
 880 * be a negative value for error codes or 1 to simply stop it.
 881 *
 882 * path must point to the INODE_REF or INODE_EXTREF when called.
 883 */
 884static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
 885                             struct btrfs_key *found_key, int resolve,
 886                             iterate_inode_ref_t iterate, void *ctx)
 887{
 888        struct extent_buffer *eb = path->nodes[0];
 889        struct btrfs_item *item;
 890        struct btrfs_inode_ref *iref;
 891        struct btrfs_inode_extref *extref;
 892        struct btrfs_path *tmp_path;
 893        struct fs_path *p;
 894        u32 cur = 0;
 895        u32 total;
 896        int slot = path->slots[0];
 897        u32 name_len;
 898        char *start;
 899        int ret = 0;
 900        int num = 0;
 901        int index;
 902        u64 dir;
 903        unsigned long name_off;
 904        unsigned long elem_size;
 905        unsigned long ptr;
 906
 907        p = fs_path_alloc_reversed();
 908        if (!p)
 909                return -ENOMEM;
 910
 911        tmp_path = alloc_path_for_send();
 912        if (!tmp_path) {
 913                fs_path_free(p);
 914                return -ENOMEM;
 915        }
 916
 917
 918        if (found_key->type == BTRFS_INODE_REF_KEY) {
 919                ptr = (unsigned long)btrfs_item_ptr(eb, slot,
 920                                                    struct btrfs_inode_ref);
 921                item = btrfs_item_nr(slot);
 922                total = btrfs_item_size(eb, item);
 923                elem_size = sizeof(*iref);
 924        } else {
 925                ptr = btrfs_item_ptr_offset(eb, slot);
 926                total = btrfs_item_size_nr(eb, slot);
 927                elem_size = sizeof(*extref);
 928        }
 929
 930        while (cur < total) {
 931                fs_path_reset(p);
 932
 933                if (found_key->type == BTRFS_INODE_REF_KEY) {
 934                        iref = (struct btrfs_inode_ref *)(ptr + cur);
 935                        name_len = btrfs_inode_ref_name_len(eb, iref);
 936                        name_off = (unsigned long)(iref + 1);
 937                        index = btrfs_inode_ref_index(eb, iref);
 938                        dir = found_key->offset;
 939                } else {
 940                        extref = (struct btrfs_inode_extref *)(ptr + cur);
 941                        name_len = btrfs_inode_extref_name_len(eb, extref);
 942                        name_off = (unsigned long)&extref->name;
 943                        index = btrfs_inode_extref_index(eb, extref);
 944                        dir = btrfs_inode_extref_parent(eb, extref);
 945                }
 946
 947                if (resolve) {
 948                        start = btrfs_ref_to_path(root, tmp_path, name_len,
 949                                                  name_off, eb, dir,
 950                                                  p->buf, p->buf_len);
 951                        if (IS_ERR(start)) {
 952                                ret = PTR_ERR(start);
 953                                goto out;
 954                        }
 955                        if (start < p->buf) {
 956                                /* overflow , try again with larger buffer */
 957                                ret = fs_path_ensure_buf(p,
 958                                                p->buf_len + p->buf - start);
 959                                if (ret < 0)
 960                                        goto out;
 961                                start = btrfs_ref_to_path(root, tmp_path,
 962                                                          name_len, name_off,
 963                                                          eb, dir,
 964                                                          p->buf, p->buf_len);
 965                                if (IS_ERR(start)) {
 966                                        ret = PTR_ERR(start);
 967                                        goto out;
 968                                }
 969                                BUG_ON(start < p->buf);
 970                        }
 971                        p->start = start;
 972                } else {
 973                        ret = fs_path_add_from_extent_buffer(p, eb, name_off,
 974                                                             name_len);
 975                        if (ret < 0)
 976                                goto out;
 977                }
 978
 979                cur += elem_size + name_len;
 980                ret = iterate(num, dir, index, p, ctx);
 981                if (ret)
 982                        goto out;
 983                num++;
 984        }
 985
 986out:
 987        btrfs_free_path(tmp_path);
 988        fs_path_free(p);
 989        return ret;
 990}
 991
 992typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
 993                                  const char *name, int name_len,
 994                                  const char *data, int data_len,
 995                                  u8 type, void *ctx);
 996
 997/*
 998 * Helper function to iterate the entries in ONE btrfs_dir_item.
 999 * The iterate callback may return a non zero value to stop iteration. This can
1000 * be a negative value for error codes or 1 to simply stop it.
1001 *
1002 * path must point to the dir item when called.
1003 */
1004static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
1005                            struct btrfs_key *found_key,
1006                            iterate_dir_item_t iterate, void *ctx)
1007{
1008        int ret = 0;
1009        struct extent_buffer *eb;
1010        struct btrfs_item *item;
1011        struct btrfs_dir_item *di;
1012        struct btrfs_key di_key;
1013        char *buf = NULL;
1014        int buf_len;
1015        u32 name_len;
1016        u32 data_len;
1017        u32 cur;
1018        u32 len;
1019        u32 total;
1020        int slot;
1021        int num;
1022        u8 type;
1023
1024        /*
1025         * Start with a small buffer (1 page). If later we end up needing more
1026         * space, which can happen for xattrs on a fs with a leaf size greater
1027         * then the page size, attempt to increase the buffer. Typically xattr
1028         * values are small.
1029         */
1030        buf_len = PATH_MAX;
1031        buf = kmalloc(buf_len, GFP_KERNEL);
1032        if (!buf) {
1033                ret = -ENOMEM;
1034                goto out;
1035        }
1036
1037        eb = path->nodes[0];
1038        slot = path->slots[0];
1039        item = btrfs_item_nr(slot);
1040        di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1041        cur = 0;
1042        len = 0;
1043        total = btrfs_item_size(eb, item);
1044
1045        num = 0;
1046        while (cur < total) {
1047                name_len = btrfs_dir_name_len(eb, di);
1048                data_len = btrfs_dir_data_len(eb, di);
1049                type = btrfs_dir_type(eb, di);
1050                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
1051
1052                if (type == BTRFS_FT_XATTR) {
1053                        if (name_len > XATTR_NAME_MAX) {
1054                                ret = -ENAMETOOLONG;
1055                                goto out;
1056                        }
1057                        if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) {
1058                                ret = -E2BIG;
1059                                goto out;
1060                        }
1061                } else {
1062                        /*
1063                         * Path too long
1064                         */
1065                        if (name_len + data_len > PATH_MAX) {
1066                                ret = -ENAMETOOLONG;
1067                                goto out;
1068                        }
1069                }
1070
1071                if (name_len + data_len > buf_len) {
1072                        buf_len = name_len + data_len;
1073                        if (is_vmalloc_addr(buf)) {
1074                                vfree(buf);
1075                                buf = NULL;
1076                        } else {
1077                                char *tmp = krealloc(buf, buf_len,
1078                                                GFP_KERNEL | __GFP_NOWARN);
1079
1080                                if (!tmp)
1081                                        kfree(buf);
1082                                buf = tmp;
1083                        }
1084                        if (!buf) {
1085                                buf = vmalloc(buf_len);
1086                                if (!buf) {
1087                                        ret = -ENOMEM;
1088                                        goto out;
1089                                }
1090                        }
1091                }
1092
1093                read_extent_buffer(eb, buf, (unsigned long)(di + 1),
1094                                name_len + data_len);
1095
1096                len = sizeof(*di) + name_len + data_len;
1097                di = (struct btrfs_dir_item *)((char *)di + len);
1098                cur += len;
1099
1100                ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1101                                data_len, type, ctx);
1102                if (ret < 0)
1103                        goto out;
1104                if (ret) {
1105                        ret = 0;
1106                        goto out;
1107                }
1108
1109                num++;
1110        }
1111
1112out:
1113        kvfree(buf);
1114        return ret;
1115}
1116
1117static int __copy_first_ref(int num, u64 dir, int index,
1118                            struct fs_path *p, void *ctx)
1119{
1120        int ret;
1121        struct fs_path *pt = ctx;
1122
1123        ret = fs_path_copy(pt, p);
1124        if (ret < 0)
1125                return ret;
1126
1127        /* we want the first only */
1128        return 1;
1129}
1130
1131/*
1132 * Retrieve the first path of an inode. If an inode has more then one
1133 * ref/hardlink, this is ignored.
1134 */
1135static int get_inode_path(struct btrfs_root *root,
1136                          u64 ino, struct fs_path *path)
1137{
1138        int ret;
1139        struct btrfs_key key, found_key;
1140        struct btrfs_path *p;
1141
1142        p = alloc_path_for_send();
1143        if (!p)
1144                return -ENOMEM;
1145
1146        fs_path_reset(path);
1147
1148        key.objectid = ino;
1149        key.type = BTRFS_INODE_REF_KEY;
1150        key.offset = 0;
1151
1152        ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1153        if (ret < 0)
1154                goto out;
1155        if (ret) {
1156                ret = 1;
1157                goto out;
1158        }
1159        btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1160        if (found_key.objectid != ino ||
1161            (found_key.type != BTRFS_INODE_REF_KEY &&
1162             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1163                ret = -ENOENT;
1164                goto out;
1165        }
1166
1167        ret = iterate_inode_ref(root, p, &found_key, 1,
1168                                __copy_first_ref, path);
1169        if (ret < 0)
1170                goto out;
1171        ret = 0;
1172
1173out:
1174        btrfs_free_path(p);
1175        return ret;
1176}
1177
1178struct backref_ctx {
1179        struct send_ctx *sctx;
1180
1181        struct btrfs_path *path;
1182        /* number of total found references */
1183        u64 found;
1184
1185        /*
1186         * used for clones found in send_root. clones found behind cur_objectid
1187         * and cur_offset are not considered as allowed clones.
1188         */
1189        u64 cur_objectid;
1190        u64 cur_offset;
1191
1192        /* may be truncated in case it's the last extent in a file */
1193        u64 extent_len;
1194
1195        /* data offset in the file extent item */
1196        u64 data_offset;
1197
1198        /* Just to check for bugs in backref resolving */
1199        int found_itself;
1200};
1201
1202static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1203{
1204        u64 root = (u64)(uintptr_t)key;
1205        struct clone_root *cr = (struct clone_root *)elt;
1206
1207        if (root < cr->root->objectid)
1208                return -1;
1209        if (root > cr->root->objectid)
1210                return 1;
1211        return 0;
1212}
1213
1214static int __clone_root_cmp_sort(const void *e1, const void *e2)
1215{
1216        struct clone_root *cr1 = (struct clone_root *)e1;
1217        struct clone_root *cr2 = (struct clone_root *)e2;
1218
1219        if (cr1->root->objectid < cr2->root->objectid)
1220                return -1;
1221        if (cr1->root->objectid > cr2->root->objectid)
1222                return 1;
1223        return 0;
1224}
1225
1226/*
1227 * Called for every backref that is found for the current extent.
1228 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1229 */
1230static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1231{
1232        struct backref_ctx *bctx = ctx_;
1233        struct clone_root *found;
1234        int ret;
1235        u64 i_size;
1236
1237        /* First check if the root is in the list of accepted clone sources */
1238        found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1239                        bctx->sctx->clone_roots_cnt,
1240                        sizeof(struct clone_root),
1241                        __clone_root_cmp_bsearch);
1242        if (!found)
1243                return 0;
1244
1245        if (found->root == bctx->sctx->send_root &&
1246            ino == bctx->cur_objectid &&
1247            offset == bctx->cur_offset) {
1248                bctx->found_itself = 1;
1249        }
1250
1251        /*
1252         * There are inodes that have extents that lie behind its i_size. Don't
1253         * accept clones from these extents.
1254         */
1255        ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
1256                               NULL, NULL, NULL);
1257        btrfs_release_path(bctx->path);
1258        if (ret < 0)
1259                return ret;
1260
1261        if (offset + bctx->data_offset + bctx->extent_len > i_size)
1262                return 0;
1263
1264        /*
1265         * Make sure we don't consider clones from send_root that are
1266         * behind the current inode/offset.
1267         */
1268        if (found->root == bctx->sctx->send_root) {
1269                /*
1270                 * TODO for the moment we don't accept clones from the inode
1271                 * that is currently send. We may change this when
1272                 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1273                 * file.
1274                 */
1275                if (ino >= bctx->cur_objectid)
1276                        return 0;
1277#if 0
1278                if (ino > bctx->cur_objectid)
1279                        return 0;
1280                if (offset + bctx->extent_len > bctx->cur_offset)
1281                        return 0;
1282#endif
1283        }
1284
1285        bctx->found++;
1286        found->found_refs++;
1287        if (ino < found->ino) {
1288                found->ino = ino;
1289                found->offset = offset;
1290        } else if (found->ino == ino) {
1291                /*
1292                 * same extent found more then once in the same file.
1293                 */
1294                if (found->offset > offset + bctx->extent_len)
1295                        found->offset = offset;
1296        }
1297
1298        return 0;
1299}
1300
1301/*
1302 * Given an inode, offset and extent item, it finds a good clone for a clone
1303 * instruction. Returns -ENOENT when none could be found. The function makes
1304 * sure that the returned clone is usable at the point where sending is at the
1305 * moment. This means, that no clones are accepted which lie behind the current
1306 * inode+offset.
1307 *
1308 * path must point to the extent item when called.
1309 */
1310static int find_extent_clone(struct send_ctx *sctx,
1311                             struct btrfs_path *path,
1312                             u64 ino, u64 data_offset,
1313                             u64 ino_size,
1314                             struct clone_root **found)
1315{
1316        int ret;
1317        int extent_type;
1318        u64 logical;
1319        u64 disk_byte;
1320        u64 num_bytes;
1321        u64 extent_item_pos;
1322        u64 flags = 0;
1323        struct btrfs_file_extent_item *fi;
1324        struct extent_buffer *eb = path->nodes[0];
1325        struct backref_ctx *backref_ctx = NULL;
1326        struct clone_root *cur_clone_root;
1327        struct btrfs_key found_key;
1328        struct btrfs_path *tmp_path;
1329        int compressed;
1330        u32 i;
1331
1332        tmp_path = alloc_path_for_send();
1333        if (!tmp_path)
1334                return -ENOMEM;
1335
1336        /* We only use this path under the commit sem */
1337        tmp_path->need_commit_sem = 0;
1338
1339        backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
1340        if (!backref_ctx) {
1341                ret = -ENOMEM;
1342                goto out;
1343        }
1344
1345        backref_ctx->path = tmp_path;
1346
1347        if (data_offset >= ino_size) {
1348                /*
1349                 * There may be extents that lie behind the file's size.
1350                 * I at least had this in combination with snapshotting while
1351                 * writing large files.
1352                 */
1353                ret = 0;
1354                goto out;
1355        }
1356
1357        fi = btrfs_item_ptr(eb, path->slots[0],
1358                        struct btrfs_file_extent_item);
1359        extent_type = btrfs_file_extent_type(eb, fi);
1360        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1361                ret = -ENOENT;
1362                goto out;
1363        }
1364        compressed = btrfs_file_extent_compression(eb, fi);
1365
1366        num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1367        disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1368        if (disk_byte == 0) {
1369                ret = -ENOENT;
1370                goto out;
1371        }
1372        logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1373
1374        down_read(&sctx->send_root->fs_info->commit_root_sem);
1375        ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1376                                  &found_key, &flags);
1377        up_read(&sctx->send_root->fs_info->commit_root_sem);
1378        btrfs_release_path(tmp_path);
1379
1380        if (ret < 0)
1381                goto out;
1382        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1383                ret = -EIO;
1384                goto out;
1385        }
1386
1387        /*
1388         * Setup the clone roots.
1389         */
1390        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1391                cur_clone_root = sctx->clone_roots + i;
1392                cur_clone_root->ino = (u64)-1;
1393                cur_clone_root->offset = 0;
1394                cur_clone_root->found_refs = 0;
1395        }
1396
1397        backref_ctx->sctx = sctx;
1398        backref_ctx->found = 0;
1399        backref_ctx->cur_objectid = ino;
1400        backref_ctx->cur_offset = data_offset;
1401        backref_ctx->found_itself = 0;
1402        backref_ctx->extent_len = num_bytes;
1403        /*
1404         * For non-compressed extents iterate_extent_inodes() gives us extent
1405         * offsets that already take into account the data offset, but not for
1406         * compressed extents, since the offset is logical and not relative to
1407         * the physical extent locations. We must take this into account to
1408         * avoid sending clone offsets that go beyond the source file's size,
1409         * which would result in the clone ioctl failing with -EINVAL on the
1410         * receiving end.
1411         */
1412        if (compressed == BTRFS_COMPRESS_NONE)
1413                backref_ctx->data_offset = 0;
1414        else
1415                backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
1416
1417        /*
1418         * The last extent of a file may be too large due to page alignment.
1419         * We need to adjust extent_len in this case so that the checks in
1420         * __iterate_backrefs work.
1421         */
1422        if (data_offset + num_bytes >= ino_size)
1423                backref_ctx->extent_len = ino_size - data_offset;
1424
1425        /*
1426         * Now collect all backrefs.
1427         */
1428        if (compressed == BTRFS_COMPRESS_NONE)
1429                extent_item_pos = logical - found_key.objectid;
1430        else
1431                extent_item_pos = 0;
1432        ret = iterate_extent_inodes(sctx->send_root->fs_info,
1433                                        found_key.objectid, extent_item_pos, 1,
1434                                        __iterate_backrefs, backref_ctx);
1435
1436        if (ret < 0)
1437                goto out;
1438
1439        if (!backref_ctx->found_itself) {
1440                /* found a bug in backref code? */
1441                ret = -EIO;
1442                btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1443                                "send_root. inode=%llu, offset=%llu, "
1444                                "disk_byte=%llu found extent=%llu",
1445                                ino, data_offset, disk_byte, found_key.objectid);
1446                goto out;
1447        }
1448
1449verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1450                "ino=%llu, "
1451                "num_bytes=%llu, logical=%llu\n",
1452                data_offset, ino, num_bytes, logical);
1453
1454        if (!backref_ctx->found)
1455                verbose_printk("btrfs:    no clones found\n");
1456
1457        cur_clone_root = NULL;
1458        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1459                if (sctx->clone_roots[i].found_refs) {
1460                        if (!cur_clone_root)
1461                                cur_clone_root = sctx->clone_roots + i;
1462                        else if (sctx->clone_roots[i].root == sctx->send_root)
1463                                /* prefer clones from send_root over others */
1464                                cur_clone_root = sctx->clone_roots + i;
1465                }
1466
1467        }
1468
1469        if (cur_clone_root) {
1470                *found = cur_clone_root;
1471                ret = 0;
1472        } else {
1473                ret = -ENOENT;
1474        }
1475
1476out:
1477        btrfs_free_path(tmp_path);
1478        kfree(backref_ctx);
1479        return ret;
1480}
1481
1482static int read_symlink(struct btrfs_root *root,
1483                        u64 ino,
1484                        struct fs_path *dest)
1485{
1486        int ret;
1487        struct btrfs_path *path;
1488        struct btrfs_key key;
1489        struct btrfs_file_extent_item *ei;
1490        u8 type;
1491        u8 compression;
1492        unsigned long off;
1493        int len;
1494
1495        path = alloc_path_for_send();
1496        if (!path)
1497                return -ENOMEM;
1498
1499        key.objectid = ino;
1500        key.type = BTRFS_EXTENT_DATA_KEY;
1501        key.offset = 0;
1502        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1503        if (ret < 0)
1504                goto out;
1505        if (ret) {
1506                /*
1507                 * An empty symlink inode. Can happen in rare error paths when
1508                 * creating a symlink (transaction committed before the inode
1509                 * eviction handler removed the symlink inode items and a crash
1510                 * happened in between or the subvol was snapshoted in between).
1511                 * Print an informative message to dmesg/syslog so that the user
1512                 * can delete the symlink.
1513                 */
1514                btrfs_err(root->fs_info,
1515                          "Found empty symlink inode %llu at root %llu",
1516                          ino, root->root_key.objectid);
1517                ret = -EIO;
1518                goto out;
1519        }
1520
1521        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1522                        struct btrfs_file_extent_item);
1523        type = btrfs_file_extent_type(path->nodes[0], ei);
1524        compression = btrfs_file_extent_compression(path->nodes[0], ei);
1525        BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1526        BUG_ON(compression);
1527
1528        off = btrfs_file_extent_inline_start(ei);
1529        len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1530
1531        ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1532
1533out:
1534        btrfs_free_path(path);
1535        return ret;
1536}
1537
1538/*
1539 * Helper function to generate a file name that is unique in the root of
1540 * send_root and parent_root. This is used to generate names for orphan inodes.
1541 */
1542static int gen_unique_name(struct send_ctx *sctx,
1543                           u64 ino, u64 gen,
1544                           struct fs_path *dest)
1545{
1546        int ret = 0;
1547        struct btrfs_path *path;
1548        struct btrfs_dir_item *di;
1549        char tmp[64];
1550        int len;
1551        u64 idx = 0;
1552
1553        path = alloc_path_for_send();
1554        if (!path)
1555                return -ENOMEM;
1556
1557        while (1) {
1558                len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1559                                ino, gen, idx);
1560                ASSERT(len < sizeof(tmp));
1561
1562                di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1563                                path, BTRFS_FIRST_FREE_OBJECTID,
1564                                tmp, strlen(tmp), 0);
1565                btrfs_release_path(path);
1566                if (IS_ERR(di)) {
1567                        ret = PTR_ERR(di);
1568                        goto out;
1569                }
1570                if (di) {
1571                        /* not unique, try again */
1572                        idx++;
1573                        continue;
1574                }
1575
1576                if (!sctx->parent_root) {
1577                        /* unique */
1578                        ret = 0;
1579                        break;
1580                }
1581
1582                di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1583                                path, BTRFS_FIRST_FREE_OBJECTID,
1584                                tmp, strlen(tmp), 0);
1585                btrfs_release_path(path);
1586                if (IS_ERR(di)) {
1587                        ret = PTR_ERR(di);
1588                        goto out;
1589                }
1590                if (di) {
1591                        /* not unique, try again */
1592                        idx++;
1593                        continue;
1594                }
1595                /* unique */
1596                break;
1597        }
1598
1599        ret = fs_path_add(dest, tmp, strlen(tmp));
1600
1601out:
1602        btrfs_free_path(path);
1603        return ret;
1604}
1605
1606enum inode_state {
1607        inode_state_no_change,
1608        inode_state_will_create,
1609        inode_state_did_create,
1610        inode_state_will_delete,
1611        inode_state_did_delete,
1612};
1613
1614static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1615{
1616        int ret;
1617        int left_ret;
1618        int right_ret;
1619        u64 left_gen;
1620        u64 right_gen;
1621
1622        ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1623                        NULL, NULL);
1624        if (ret < 0 && ret != -ENOENT)
1625                goto out;
1626        left_ret = ret;
1627
1628        if (!sctx->parent_root) {
1629                right_ret = -ENOENT;
1630        } else {
1631                ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1632                                NULL, NULL, NULL, NULL);
1633                if (ret < 0 && ret != -ENOENT)
1634                        goto out;
1635                right_ret = ret;
1636        }
1637
1638        if (!left_ret && !right_ret) {
1639                if (left_gen == gen && right_gen == gen) {
1640                        ret = inode_state_no_change;
1641                } else if (left_gen == gen) {
1642                        if (ino < sctx->send_progress)
1643                                ret = inode_state_did_create;
1644                        else
1645                                ret = inode_state_will_create;
1646                } else if (right_gen == gen) {
1647                        if (ino < sctx->send_progress)
1648                                ret = inode_state_did_delete;
1649                        else
1650                                ret = inode_state_will_delete;
1651                } else  {
1652                        ret = -ENOENT;
1653                }
1654        } else if (!left_ret) {
1655                if (left_gen == gen) {
1656                        if (ino < sctx->send_progress)
1657                                ret = inode_state_did_create;
1658                        else
1659                                ret = inode_state_will_create;
1660                } else {
1661                        ret = -ENOENT;
1662                }
1663        } else if (!right_ret) {
1664                if (right_gen == gen) {
1665                        if (ino < sctx->send_progress)
1666                                ret = inode_state_did_delete;
1667                        else
1668                                ret = inode_state_will_delete;
1669                } else {
1670                        ret = -ENOENT;
1671                }
1672        } else {
1673                ret = -ENOENT;
1674        }
1675
1676out:
1677        return ret;
1678}
1679
1680static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1681{
1682        int ret;
1683
1684        ret = get_cur_inode_state(sctx, ino, gen);
1685        if (ret < 0)
1686                goto out;
1687
1688        if (ret == inode_state_no_change ||
1689            ret == inode_state_did_create ||
1690            ret == inode_state_will_delete)
1691                ret = 1;
1692        else
1693                ret = 0;
1694
1695out:
1696        return ret;
1697}
1698
1699/*
1700 * Helper function to lookup a dir item in a dir.
1701 */
1702static int lookup_dir_item_inode(struct btrfs_root *root,
1703                                 u64 dir, const char *name, int name_len,
1704                                 u64 *found_inode,
1705                                 u8 *found_type)
1706{
1707        int ret = 0;
1708        struct btrfs_dir_item *di;
1709        struct btrfs_key key;
1710        struct btrfs_path *path;
1711
1712        path = alloc_path_for_send();
1713        if (!path)
1714                return -ENOMEM;
1715
1716        di = btrfs_lookup_dir_item(NULL, root, path,
1717                        dir, name, name_len, 0);
1718        if (!di) {
1719                ret = -ENOENT;
1720                goto out;
1721        }
1722        if (IS_ERR(di)) {
1723                ret = PTR_ERR(di);
1724                goto out;
1725        }
1726        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1727        if (key.type == BTRFS_ROOT_ITEM_KEY) {
1728                ret = -ENOENT;
1729                goto out;
1730        }
1731        *found_inode = key.objectid;
1732        *found_type = btrfs_dir_type(path->nodes[0], di);
1733
1734out:
1735        btrfs_free_path(path);
1736        return ret;
1737}
1738
1739/*
1740 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1741 * generation of the parent dir and the name of the dir entry.
1742 */
1743static int get_first_ref(struct btrfs_root *root, u64 ino,
1744                         u64 *dir, u64 *dir_gen, struct fs_path *name)
1745{
1746        int ret;
1747        struct btrfs_key key;
1748        struct btrfs_key found_key;
1749        struct btrfs_path *path;
1750        int len;
1751        u64 parent_dir;
1752
1753        path = alloc_path_for_send();
1754        if (!path)
1755                return -ENOMEM;
1756
1757        key.objectid = ino;
1758        key.type = BTRFS_INODE_REF_KEY;
1759        key.offset = 0;
1760
1761        ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1762        if (ret < 0)
1763                goto out;
1764        if (!ret)
1765                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1766                                path->slots[0]);
1767        if (ret || found_key.objectid != ino ||
1768            (found_key.type != BTRFS_INODE_REF_KEY &&
1769             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1770                ret = -ENOENT;
1771                goto out;
1772        }
1773
1774        if (found_key.type == BTRFS_INODE_REF_KEY) {
1775                struct btrfs_inode_ref *iref;
1776                iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1777                                      struct btrfs_inode_ref);
1778                len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1779                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1780                                                     (unsigned long)(iref + 1),
1781                                                     len);
1782                parent_dir = found_key.offset;
1783        } else {
1784                struct btrfs_inode_extref *extref;
1785                extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1786                                        struct btrfs_inode_extref);
1787                len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1788                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1789                                        (unsigned long)&extref->name, len);
1790                parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1791        }
1792        if (ret < 0)
1793                goto out;
1794        btrfs_release_path(path);
1795
1796        if (dir_gen) {
1797                ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
1798                                     NULL, NULL, NULL);
1799                if (ret < 0)
1800                        goto out;
1801        }
1802
1803        *dir = parent_dir;
1804
1805out:
1806        btrfs_free_path(path);
1807        return ret;
1808}
1809
1810static int is_first_ref(struct btrfs_root *root,
1811                        u64 ino, u64 dir,
1812                        const char *name, int name_len)
1813{
1814        int ret;
1815        struct fs_path *tmp_name;
1816        u64 tmp_dir;
1817
1818        tmp_name = fs_path_alloc();
1819        if (!tmp_name)
1820                return -ENOMEM;
1821
1822        ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
1823        if (ret < 0)
1824                goto out;
1825
1826        if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1827                ret = 0;
1828                goto out;
1829        }
1830
1831        ret = !memcmp(tmp_name->start, name, name_len);
1832
1833out:
1834        fs_path_free(tmp_name);
1835        return ret;
1836}
1837
1838/*
1839 * Used by process_recorded_refs to determine if a new ref would overwrite an
1840 * already existing ref. In case it detects an overwrite, it returns the
1841 * inode/gen in who_ino/who_gen.
1842 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1843 * to make sure later references to the overwritten inode are possible.
1844 * Orphanizing is however only required for the first ref of an inode.
1845 * process_recorded_refs does an additional is_first_ref check to see if
1846 * orphanizing is really required.
1847 */
1848static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1849                              const char *name, int name_len,
1850                              u64 *who_ino, u64 *who_gen)
1851{
1852        int ret = 0;
1853        u64 gen;
1854        u64 other_inode = 0;
1855        u8 other_type = 0;
1856
1857        if (!sctx->parent_root)
1858                goto out;
1859
1860        ret = is_inode_existent(sctx, dir, dir_gen);
1861        if (ret <= 0)
1862                goto out;
1863
1864        /*
1865         * If we have a parent root we need to verify that the parent dir was
1866         * not deleted and then re-created, if it was then we have no overwrite
1867         * and we can just unlink this entry.
1868         */
1869        if (sctx->parent_root) {
1870                ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1871                                     NULL, NULL, NULL);
1872                if (ret < 0 && ret != -ENOENT)
1873                        goto out;
1874                if (ret) {
1875                        ret = 0;
1876                        goto out;
1877                }
1878                if (gen != dir_gen)
1879                        goto out;
1880        }
1881
1882        ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1883                        &other_inode, &other_type);
1884        if (ret < 0 && ret != -ENOENT)
1885                goto out;
1886        if (ret) {
1887                ret = 0;
1888                goto out;
1889        }
1890
1891        /*
1892         * Check if the overwritten ref was already processed. If yes, the ref
1893         * was already unlinked/moved, so we can safely assume that we will not
1894         * overwrite anything at this point in time.
1895         */
1896        if (other_inode > sctx->send_progress ||
1897            is_waiting_for_move(sctx, other_inode)) {
1898                ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1899                                who_gen, NULL, NULL, NULL, NULL);
1900                if (ret < 0)
1901                        goto out;
1902
1903                ret = 1;
1904                *who_ino = other_inode;
1905        } else {
1906                ret = 0;
1907        }
1908
1909out:
1910        return ret;
1911}
1912
1913/*
1914 * Checks if the ref was overwritten by an already processed inode. This is
1915 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1916 * thus the orphan name needs be used.
1917 * process_recorded_refs also uses it to avoid unlinking of refs that were
1918 * overwritten.
1919 */
1920static int did_overwrite_ref(struct send_ctx *sctx,
1921                            u64 dir, u64 dir_gen,
1922                            u64 ino, u64 ino_gen,
1923                            const char *name, int name_len)
1924{
1925        int ret = 0;
1926        u64 gen;
1927        u64 ow_inode;
1928        u8 other_type;
1929
1930        if (!sctx->parent_root)
1931                goto out;
1932
1933        ret = is_inode_existent(sctx, dir, dir_gen);
1934        if (ret <= 0)
1935                goto out;
1936
1937        /* check if the ref was overwritten by another ref */
1938        ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1939                        &ow_inode, &other_type);
1940        if (ret < 0 && ret != -ENOENT)
1941                goto out;
1942        if (ret) {
1943                /* was never and will never be overwritten */
1944                ret = 0;
1945                goto out;
1946        }
1947
1948        ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1949                        NULL, NULL);
1950        if (ret < 0)
1951                goto out;
1952
1953        if (ow_inode == ino && gen == ino_gen) {
1954                ret = 0;
1955                goto out;
1956        }
1957
1958        /*
1959         * We know that it is or will be overwritten. Check this now.
1960         * The current inode being processed might have been the one that caused
1961         * inode 'ino' to be orphanized, therefore check if ow_inode matches
1962         * the current inode being processed.
1963         */
1964        if ((ow_inode < sctx->send_progress) ||
1965            (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
1966             gen == sctx->cur_inode_gen))
1967                ret = 1;
1968        else
1969                ret = 0;
1970
1971out:
1972        return ret;
1973}
1974
1975/*
1976 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1977 * that got overwritten. This is used by process_recorded_refs to determine
1978 * if it has to use the path as returned by get_cur_path or the orphan name.
1979 */
1980static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1981{
1982        int ret = 0;
1983        struct fs_path *name = NULL;
1984        u64 dir;
1985        u64 dir_gen;
1986
1987        if (!sctx->parent_root)
1988                goto out;
1989
1990        name = fs_path_alloc();
1991        if (!name)
1992                return -ENOMEM;
1993
1994        ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
1995        if (ret < 0)
1996                goto out;
1997
1998        ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1999                        name->start, fs_path_len(name));
2000
2001out:
2002        fs_path_free(name);
2003        return ret;
2004}
2005
2006/*
2007 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
2008 * so we need to do some special handling in case we have clashes. This function
2009 * takes care of this with the help of name_cache_entry::radix_list.
2010 * In case of error, nce is kfreed.
2011 */
2012static int name_cache_insert(struct send_ctx *sctx,
2013                             struct name_cache_entry *nce)
2014{
2015        int ret = 0;
2016        struct list_head *nce_head;
2017
2018        nce_head = radix_tree_lookup(&sctx->name_cache,
2019                        (unsigned long)nce->ino);
2020        if (!nce_head) {
2021                nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
2022                if (!nce_head) {
2023                        kfree(nce);
2024                        return -ENOMEM;
2025                }
2026                INIT_LIST_HEAD(nce_head);
2027
2028                ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
2029                if (ret < 0) {
2030                        kfree(nce_head);
2031                        kfree(nce);
2032                        return ret;
2033                }
2034        }
2035        list_add_tail(&nce->radix_list, nce_head);
2036        list_add_tail(&nce->list, &sctx->name_cache_list);
2037        sctx->name_cache_size++;
2038
2039        return ret;
2040}
2041
2042static void name_cache_delete(struct send_ctx *sctx,
2043                              struct name_cache_entry *nce)
2044{
2045        struct list_head *nce_head;
2046
2047        nce_head = radix_tree_lookup(&sctx->name_cache,
2048                        (unsigned long)nce->ino);
2049        if (!nce_head) {
2050                btrfs_err(sctx->send_root->fs_info,
2051              "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
2052                        nce->ino, sctx->name_cache_size);
2053        }
2054
2055        list_del(&nce->radix_list);
2056        list_del(&nce->list);
2057        sctx->name_cache_size--;
2058
2059        /*
2060         * We may not get to the final release of nce_head if the lookup fails
2061         */
2062        if (nce_head && list_empty(nce_head)) {
2063                radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
2064                kfree(nce_head);
2065        }
2066}
2067
2068static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
2069                                                    u64 ino, u64 gen)
2070{
2071        struct list_head *nce_head;
2072        struct name_cache_entry *cur;
2073
2074        nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
2075        if (!nce_head)
2076                return NULL;
2077
2078        list_for_each_entry(cur, nce_head, radix_list) {
2079                if (cur->ino == ino && cur->gen == gen)
2080                        return cur;
2081        }
2082        return NULL;
2083}
2084
2085/*
2086 * Removes the entry from the list and adds it back to the end. This marks the
2087 * entry as recently used so that name_cache_clean_unused does not remove it.
2088 */
2089static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
2090{
2091        list_del(&nce->list);
2092        list_add_tail(&nce->list, &sctx->name_cache_list);
2093}
2094
2095/*
2096 * Remove some entries from the beginning of name_cache_list.
2097 */
2098static void name_cache_clean_unused(struct send_ctx *sctx)
2099{
2100        struct name_cache_entry *nce;
2101
2102        if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
2103                return;
2104
2105        while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
2106                nce = list_entry(sctx->name_cache_list.next,
2107                                struct name_cache_entry, list);
2108                name_cache_delete(sctx, nce);
2109                kfree(nce);
2110        }
2111}
2112
2113static void name_cache_free(struct send_ctx *sctx)
2114{
2115        struct name_cache_entry *nce;
2116
2117        while (!list_empty(&sctx->name_cache_list)) {
2118                nce = list_entry(sctx->name_cache_list.next,
2119                                struct name_cache_entry, list);
2120                name_cache_delete(sctx, nce);
2121                kfree(nce);
2122        }
2123}
2124
2125/*
2126 * Used by get_cur_path for each ref up to the root.
2127 * Returns 0 if it succeeded.
2128 * Returns 1 if the inode is not existent or got overwritten. In that case, the
2129 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
2130 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
2131 * Returns <0 in case of error.
2132 */
2133static int __get_cur_name_and_parent(struct send_ctx *sctx,
2134                                     u64 ino, u64 gen,
2135                                     u64 *parent_ino,
2136                                     u64 *parent_gen,
2137                                     struct fs_path *dest)
2138{
2139        int ret;
2140        int nce_ret;
2141        struct name_cache_entry *nce = NULL;
2142
2143        /*
2144         * First check if we already did a call to this function with the same
2145         * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2146         * return the cached result.
2147         */
2148        nce = name_cache_search(sctx, ino, gen);
2149        if (nce) {
2150                if (ino < sctx->send_progress && nce->need_later_update) {
2151                        name_cache_delete(sctx, nce);
2152                        kfree(nce);
2153                        nce = NULL;
2154                } else {
2155                        name_cache_used(sctx, nce);
2156                        *parent_ino = nce->parent_ino;
2157                        *parent_gen = nce->parent_gen;
2158                        ret = fs_path_add(dest, nce->name, nce->name_len);
2159                        if (ret < 0)
2160                                goto out;
2161                        ret = nce->ret;
2162                        goto out;
2163                }
2164        }
2165
2166        /*
2167         * If the inode is not existent yet, add the orphan name and return 1.
2168         * This should only happen for the parent dir that we determine in
2169         * __record_new_ref
2170         */
2171        ret = is_inode_existent(sctx, ino, gen);
2172        if (ret < 0)
2173                goto out;
2174
2175        if (!ret) {
2176                ret = gen_unique_name(sctx, ino, gen, dest);
2177                if (ret < 0)
2178                        goto out;
2179                ret = 1;
2180                goto out_cache;
2181        }
2182
2183        /*
2184         * Depending on whether the inode was already processed or not, use
2185         * send_root or parent_root for ref lookup.
2186         */
2187        if (ino < sctx->send_progress)
2188                ret = get_first_ref(sctx->send_root, ino,
2189                                    parent_ino, parent_gen, dest);
2190        else
2191                ret = get_first_ref(sctx->parent_root, ino,
2192                                    parent_ino, parent_gen, dest);
2193        if (ret < 0)
2194                goto out;
2195
2196        /*
2197         * Check if the ref was overwritten by an inode's ref that was processed
2198         * earlier. If yes, treat as orphan and return 1.
2199         */
2200        ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2201                        dest->start, dest->end - dest->start);
2202        if (ret < 0)
2203                goto out;
2204        if (ret) {
2205                fs_path_reset(dest);
2206                ret = gen_unique_name(sctx, ino, gen, dest);
2207                if (ret < 0)
2208                        goto out;
2209                ret = 1;
2210        }
2211
2212out_cache:
2213        /*
2214         * Store the result of the lookup in the name cache.
2215         */
2216        nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
2217        if (!nce) {
2218                ret = -ENOMEM;
2219                goto out;
2220        }
2221
2222        nce->ino = ino;
2223        nce->gen = gen;
2224        nce->parent_ino = *parent_ino;
2225        nce->parent_gen = *parent_gen;
2226        nce->name_len = fs_path_len(dest);
2227        nce->ret = ret;
2228        strcpy(nce->name, dest->start);
2229
2230        if (ino < sctx->send_progress)
2231                nce->need_later_update = 0;
2232        else
2233                nce->need_later_update = 1;
2234
2235        nce_ret = name_cache_insert(sctx, nce);
2236        if (nce_ret < 0)
2237                ret = nce_ret;
2238        name_cache_clean_unused(sctx);
2239
2240out:
2241        return ret;
2242}
2243
2244/*
2245 * Magic happens here. This function returns the first ref to an inode as it
2246 * would look like while receiving the stream at this point in time.
2247 * We walk the path up to the root. For every inode in between, we check if it
2248 * was already processed/sent. If yes, we continue with the parent as found
2249 * in send_root. If not, we continue with the parent as found in parent_root.
2250 * If we encounter an inode that was deleted at this point in time, we use the
2251 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2252 * that were not created yet and overwritten inodes/refs.
2253 *
2254 * When do we have have orphan inodes:
2255 * 1. When an inode is freshly created and thus no valid refs are available yet
2256 * 2. When a directory lost all it's refs (deleted) but still has dir items
2257 *    inside which were not processed yet (pending for move/delete). If anyone
2258 *    tried to get the path to the dir items, it would get a path inside that
2259 *    orphan directory.
2260 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2261 *    of an unprocessed inode. If in that case the first ref would be
2262 *    overwritten, the overwritten inode gets "orphanized". Later when we
2263 *    process this overwritten inode, it is restored at a new place by moving
2264 *    the orphan inode.
2265 *
2266 * sctx->send_progress tells this function at which point in time receiving
2267 * would be.
2268 */
2269static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2270                        struct fs_path *dest)
2271{
2272        int ret = 0;
2273        struct fs_path *name = NULL;
2274        u64 parent_inode = 0;
2275        u64 parent_gen = 0;
2276        int stop = 0;
2277
2278        name = fs_path_alloc();
2279        if (!name) {
2280                ret = -ENOMEM;
2281                goto out;
2282        }
2283
2284        dest->reversed = 1;
2285        fs_path_reset(dest);
2286
2287        while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2288                struct waiting_dir_move *wdm;
2289
2290                fs_path_reset(name);
2291
2292                if (is_waiting_for_rm(sctx, ino)) {
2293                        ret = gen_unique_name(sctx, ino, gen, name);
2294                        if (ret < 0)
2295                                goto out;
2296                        ret = fs_path_add_path(dest, name);
2297                        break;
2298                }
2299
2300                wdm = get_waiting_dir_move(sctx, ino);
2301                if (wdm && wdm->orphanized) {
2302                        ret = gen_unique_name(sctx, ino, gen, name);
2303                        stop = 1;
2304                } else if (wdm) {
2305                        ret = get_first_ref(sctx->parent_root, ino,
2306                                            &parent_inode, &parent_gen, name);
2307                } else {
2308                        ret = __get_cur_name_and_parent(sctx, ino, gen,
2309                                                        &parent_inode,
2310                                                        &parent_gen, name);
2311                        if (ret)
2312                                stop = 1;
2313                }
2314
2315                if (ret < 0)
2316                        goto out;
2317
2318                ret = fs_path_add_path(dest, name);
2319                if (ret < 0)
2320                        goto out;
2321
2322                ino = parent_inode;
2323                gen = parent_gen;
2324        }
2325
2326out:
2327        fs_path_free(name);
2328        if (!ret)
2329                fs_path_unreverse(dest);
2330        return ret;
2331}
2332
2333/*
2334 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2335 */
2336static int send_subvol_begin(struct send_ctx *sctx)
2337{
2338        int ret;
2339        struct btrfs_root *send_root = sctx->send_root;
2340        struct btrfs_root *parent_root = sctx->parent_root;
2341        struct btrfs_path *path;
2342        struct btrfs_key key;
2343        struct btrfs_root_ref *ref;
2344        struct extent_buffer *leaf;
2345        char *name = NULL;
2346        int namelen;
2347
2348        path = btrfs_alloc_path();
2349        if (!path)
2350                return -ENOMEM;
2351
2352        name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
2353        if (!name) {
2354                btrfs_free_path(path);
2355                return -ENOMEM;
2356        }
2357
2358        key.objectid = send_root->objectid;
2359        key.type = BTRFS_ROOT_BACKREF_KEY;
2360        key.offset = 0;
2361
2362        ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2363                                &key, path, 1, 0);
2364        if (ret < 0)
2365                goto out;
2366        if (ret) {
2367                ret = -ENOENT;
2368                goto out;
2369        }
2370
2371        leaf = path->nodes[0];
2372        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2373        if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2374            key.objectid != send_root->objectid) {
2375                ret = -ENOENT;
2376                goto out;
2377        }
2378        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2379        namelen = btrfs_root_ref_name_len(leaf, ref);
2380        read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2381        btrfs_release_path(path);
2382
2383        if (parent_root) {
2384                ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2385                if (ret < 0)
2386                        goto out;
2387        } else {
2388                ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2389                if (ret < 0)
2390                        goto out;
2391        }
2392
2393        TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2394
2395        if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
2396                TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2397                            sctx->send_root->root_item.received_uuid);
2398        else
2399                TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2400                            sctx->send_root->root_item.uuid);
2401
2402        TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2403                    le64_to_cpu(sctx->send_root->root_item.ctransid));
2404        if (parent_root) {
2405                if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
2406                        TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2407                                     parent_root->root_item.received_uuid);
2408                else
2409                        TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2410                                     parent_root->root_item.uuid);
2411                TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2412                            le64_to_cpu(sctx->parent_root->root_item.ctransid));
2413        }
2414
2415        ret = send_cmd(sctx);
2416
2417tlv_put_failure:
2418out:
2419        btrfs_free_path(path);
2420        kfree(name);
2421        return ret;
2422}
2423
2424static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2425{
2426        int ret = 0;
2427        struct fs_path *p;
2428
2429verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2430
2431        p = fs_path_alloc();
2432        if (!p)
2433                return -ENOMEM;
2434
2435        ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2436        if (ret < 0)
2437                goto out;
2438
2439        ret = get_cur_path(sctx, ino, gen, p);
2440        if (ret < 0)
2441                goto out;
2442        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2443        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2444
2445        ret = send_cmd(sctx);
2446
2447tlv_put_failure:
2448out:
2449        fs_path_free(p);
2450        return ret;
2451}
2452
2453static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2454{
2455        int ret = 0;
2456        struct fs_path *p;
2457
2458verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2459
2460        p = fs_path_alloc();
2461        if (!p)
2462                return -ENOMEM;
2463
2464        ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2465        if (ret < 0)
2466                goto out;
2467
2468        ret = get_cur_path(sctx, ino, gen, p);
2469        if (ret < 0)
2470                goto out;
2471        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2472        TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2473
2474        ret = send_cmd(sctx);
2475
2476tlv_put_failure:
2477out:
2478        fs_path_free(p);
2479        return ret;
2480}
2481
2482static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2483{
2484        int ret = 0;
2485        struct fs_path *p;
2486
2487verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2488
2489        p = fs_path_alloc();
2490        if (!p)
2491                return -ENOMEM;
2492
2493        ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2494        if (ret < 0)
2495                goto out;
2496
2497        ret = get_cur_path(sctx, ino, gen, p);
2498        if (ret < 0)
2499                goto out;
2500        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2501        TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2502        TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2503
2504        ret = send_cmd(sctx);
2505
2506tlv_put_failure:
2507out:
2508        fs_path_free(p);
2509        return ret;
2510}
2511
2512static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2513{
2514        int ret = 0;
2515        struct fs_path *p = NULL;
2516        struct btrfs_inode_item *ii;
2517        struct btrfs_path *path = NULL;
2518        struct extent_buffer *eb;
2519        struct btrfs_key key;
2520        int slot;
2521
2522verbose_printk("btrfs: send_utimes %llu\n", ino);
2523
2524        p = fs_path_alloc();
2525        if (!p)
2526                return -ENOMEM;
2527
2528        path = alloc_path_for_send();
2529        if (!path) {
2530                ret = -ENOMEM;
2531                goto out;
2532        }
2533
2534        key.objectid = ino;
2535        key.type = BTRFS_INODE_ITEM_KEY;
2536        key.offset = 0;
2537        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2538        if (ret > 0)
2539                ret = -ENOENT;
2540        if (ret < 0)
2541                goto out;
2542
2543        eb = path->nodes[0];
2544        slot = path->slots[0];
2545        ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2546
2547        ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2548        if (ret < 0)
2549                goto out;
2550
2551        ret = get_cur_path(sctx, ino, gen, p);
2552        if (ret < 0)
2553                goto out;
2554        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2555        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2556        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2557        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2558        /* TODO Add otime support when the otime patches get into upstream */
2559
2560        ret = send_cmd(sctx);
2561
2562tlv_put_failure:
2563out:
2564        fs_path_free(p);
2565        btrfs_free_path(path);
2566        return ret;
2567}
2568
2569/*
2570 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2571 * a valid path yet because we did not process the refs yet. So, the inode
2572 * is created as orphan.
2573 */
2574static int send_create_inode(struct send_ctx *sctx, u64 ino)
2575{
2576        int ret = 0;
2577        struct fs_path *p;
2578        int cmd;
2579        u64 gen;
2580        u64 mode;
2581        u64 rdev;
2582
2583verbose_printk("btrfs: send_create_inode %llu\n", ino);
2584
2585        p = fs_path_alloc();
2586        if (!p)
2587                return -ENOMEM;
2588
2589        if (ino != sctx->cur_ino) {
2590                ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
2591                                     NULL, NULL, &rdev);
2592                if (ret < 0)
2593                        goto out;
2594        } else {
2595                gen = sctx->cur_inode_gen;
2596                mode = sctx->cur_inode_mode;
2597                rdev = sctx->cur_inode_rdev;
2598        }
2599
2600        if (S_ISREG(mode)) {
2601                cmd = BTRFS_SEND_C_MKFILE;
2602        } else if (S_ISDIR(mode)) {
2603                cmd = BTRFS_SEND_C_MKDIR;
2604        } else if (S_ISLNK(mode)) {
2605                cmd = BTRFS_SEND_C_SYMLINK;
2606        } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2607                cmd = BTRFS_SEND_C_MKNOD;
2608        } else if (S_ISFIFO(mode)) {
2609                cmd = BTRFS_SEND_C_MKFIFO;
2610        } else if (S_ISSOCK(mode)) {
2611                cmd = BTRFS_SEND_C_MKSOCK;
2612        } else {
2613                btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
2614                                (int)(mode & S_IFMT));
2615                ret = -ENOTSUPP;
2616                goto out;
2617        }
2618
2619        ret = begin_cmd(sctx, cmd);
2620        if (ret < 0)
2621                goto out;
2622
2623        ret = gen_unique_name(sctx, ino, gen, p);
2624        if (ret < 0)
2625                goto out;
2626
2627        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2628        TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2629
2630        if (S_ISLNK(mode)) {
2631                fs_path_reset(p);
2632                ret = read_symlink(sctx->send_root, ino, p);
2633                if (ret < 0)
2634                        goto out;
2635                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2636        } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2637                   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2638                TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2639                TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2640        }
2641
2642        ret = send_cmd(sctx);
2643        if (ret < 0)
2644                goto out;
2645
2646
2647tlv_put_failure:
2648out:
2649        fs_path_free(p);
2650        return ret;
2651}
2652
2653/*
2654 * We need some special handling for inodes that get processed before the parent
2655 * directory got created. See process_recorded_refs for details.
2656 * This function does the check if we already created the dir out of order.
2657 */
2658static int did_create_dir(struct send_ctx *sctx, u64 dir)
2659{
2660        int ret = 0;
2661        struct btrfs_path *path = NULL;
2662        struct btrfs_key key;
2663        struct btrfs_key found_key;
2664        struct btrfs_key di_key;
2665        struct extent_buffer *eb;
2666        struct btrfs_dir_item *di;
2667        int slot;
2668
2669        path = alloc_path_for_send();
2670        if (!path) {
2671                ret = -ENOMEM;
2672                goto out;
2673        }
2674
2675        key.objectid = dir;
2676        key.type = BTRFS_DIR_INDEX_KEY;
2677        key.offset = 0;
2678        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2679        if (ret < 0)
2680                goto out;
2681
2682        while (1) {
2683                eb = path->nodes[0];
2684                slot = path->slots[0];
2685                if (slot >= btrfs_header_nritems(eb)) {
2686                        ret = btrfs_next_leaf(sctx->send_root, path);
2687                        if (ret < 0) {
2688                                goto out;
2689                        } else if (ret > 0) {
2690                                ret = 0;
2691                                break;
2692                        }
2693                        continue;
2694                }
2695
2696                btrfs_item_key_to_cpu(eb, &found_key, slot);
2697                if (found_key.objectid != key.objectid ||
2698                    found_key.type != key.type) {
2699                        ret = 0;
2700                        goto out;
2701                }
2702
2703                di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2704                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2705
2706                if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2707                    di_key.objectid < sctx->send_progress) {
2708                        ret = 1;
2709                        goto out;
2710                }
2711
2712                path->slots[0]++;
2713        }
2714
2715out:
2716        btrfs_free_path(path);
2717        return ret;
2718}
2719
2720/*
2721 * Only creates the inode if it is:
2722 * 1. Not a directory
2723 * 2. Or a directory which was not created already due to out of order
2724 *    directories. See did_create_dir and process_recorded_refs for details.
2725 */
2726static int send_create_inode_if_needed(struct send_ctx *sctx)
2727{
2728        int ret;
2729
2730        if (S_ISDIR(sctx->cur_inode_mode)) {
2731                ret = did_create_dir(sctx, sctx->cur_ino);
2732                if (ret < 0)
2733                        goto out;
2734                if (ret) {
2735                        ret = 0;
2736                        goto out;
2737                }
2738        }
2739
2740        ret = send_create_inode(sctx, sctx->cur_ino);
2741        if (ret < 0)
2742                goto out;
2743
2744out:
2745        return ret;
2746}
2747
2748struct recorded_ref {
2749        struct list_head list;
2750        char *dir_path;
2751        char *name;
2752        struct fs_path *full_path;
2753        u64 dir;
2754        u64 dir_gen;
2755        int dir_path_len;
2756        int name_len;
2757};
2758
2759/*
2760 * We need to process new refs before deleted refs, but compare_tree gives us
2761 * everything mixed. So we first record all refs and later process them.
2762 * This function is a helper to record one ref.
2763 */
2764static int __record_ref(struct list_head *head, u64 dir,
2765                      u64 dir_gen, struct fs_path *path)
2766{
2767        struct recorded_ref *ref;
2768
2769        ref = kmalloc(sizeof(*ref), GFP_KERNEL);
2770        if (!ref)
2771                return -ENOMEM;
2772
2773        ref->dir = dir;
2774        ref->dir_gen = dir_gen;
2775        ref->full_path = path;
2776
2777        ref->name = (char *)kbasename(ref->full_path->start);
2778        ref->name_len = ref->full_path->end - ref->name;
2779        ref->dir_path = ref->full_path->start;
2780        if (ref->name == ref->full_path->start)
2781                ref->dir_path_len = 0;
2782        else
2783                ref->dir_path_len = ref->full_path->end -
2784                                ref->full_path->start - 1 - ref->name_len;
2785
2786        list_add_tail(&ref->list, head);
2787        return 0;
2788}
2789
2790static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2791{
2792        struct recorded_ref *new;
2793
2794        new = kmalloc(sizeof(*ref), GFP_KERNEL);
2795        if (!new)
2796                return -ENOMEM;
2797
2798        new->dir = ref->dir;
2799        new->dir_gen = ref->dir_gen;
2800        new->full_path = NULL;
2801        INIT_LIST_HEAD(&new->list);
2802        list_add_tail(&new->list, list);
2803        return 0;
2804}
2805
2806static void __free_recorded_refs(struct list_head *head)
2807{
2808        struct recorded_ref *cur;
2809
2810        while (!list_empty(head)) {
2811                cur = list_entry(head->next, struct recorded_ref, list);
2812                fs_path_free(cur->full_path);
2813                list_del(&cur->list);
2814                kfree(cur);
2815        }
2816}
2817
2818static void free_recorded_refs(struct send_ctx *sctx)
2819{
2820        __free_recorded_refs(&sctx->new_refs);
2821        __free_recorded_refs(&sctx->deleted_refs);
2822}
2823
2824/*
2825 * Renames/moves a file/dir to its orphan name. Used when the first
2826 * ref of an unprocessed inode gets overwritten and for all non empty
2827 * directories.
2828 */
2829static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2830                          struct fs_path *path)
2831{
2832        int ret;
2833        struct fs_path *orphan;
2834
2835        orphan = fs_path_alloc();
2836        if (!orphan)
2837                return -ENOMEM;
2838
2839        ret = gen_unique_name(sctx, ino, gen, orphan);
2840        if (ret < 0)
2841                goto out;
2842
2843        ret = send_rename(sctx, path, orphan);
2844
2845out:
2846        fs_path_free(orphan);
2847        return ret;
2848}
2849
2850static struct orphan_dir_info *
2851add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2852{
2853        struct rb_node **p = &sctx->orphan_dirs.rb_node;
2854        struct rb_node *parent = NULL;
2855        struct orphan_dir_info *entry, *odi;
2856
2857        odi = kmalloc(sizeof(*odi), GFP_KERNEL);
2858        if (!odi)
2859                return ERR_PTR(-ENOMEM);
2860        odi->ino = dir_ino;
2861        odi->gen = 0;
2862
2863        while (*p) {
2864                parent = *p;
2865                entry = rb_entry(parent, struct orphan_dir_info, node);
2866                if (dir_ino < entry->ino) {
2867                        p = &(*p)->rb_left;
2868                } else if (dir_ino > entry->ino) {
2869                        p = &(*p)->rb_right;
2870                } else {
2871                        kfree(odi);
2872                        return entry;
2873                }
2874        }
2875
2876        rb_link_node(&odi->node, parent, p);
2877        rb_insert_color(&odi->node, &sctx->orphan_dirs);
2878        return odi;
2879}
2880
2881static struct orphan_dir_info *
2882get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2883{
2884        struct rb_node *n = sctx->orphan_dirs.rb_node;
2885        struct orphan_dir_info *entry;
2886
2887        while (n) {
2888                entry = rb_entry(n, struct orphan_dir_info, node);
2889                if (dir_ino < entry->ino)
2890                        n = n->rb_left;
2891                else if (dir_ino > entry->ino)
2892                        n = n->rb_right;
2893                else
2894                        return entry;
2895        }
2896        return NULL;
2897}
2898
2899static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2900{
2901        struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2902
2903        return odi != NULL;
2904}
2905
2906static void free_orphan_dir_info(struct send_ctx *sctx,
2907                                 struct orphan_dir_info *odi)
2908{
2909        if (!odi)
2910                return;
2911        rb_erase(&odi->node, &sctx->orphan_dirs);
2912        kfree(odi);
2913}
2914
2915/*
2916 * Returns 1 if a directory can be removed at this point in time.
2917 * We check this by iterating all dir items and checking if the inode behind
2918 * the dir item was already processed.
2919 */
2920static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2921                     u64 send_progress)
2922{
2923        int ret = 0;
2924        struct btrfs_root *root = sctx->parent_root;
2925        struct btrfs_path *path;
2926        struct btrfs_key key;
2927        struct btrfs_key found_key;
2928        struct btrfs_key loc;
2929        struct btrfs_dir_item *di;
2930
2931        /*
2932         * Don't try to rmdir the top/root subvolume dir.
2933         */
2934        if (dir == BTRFS_FIRST_FREE_OBJECTID)
2935                return 0;
2936
2937        path = alloc_path_for_send();
2938        if (!path)
2939                return -ENOMEM;
2940
2941        key.objectid = dir;
2942        key.type = BTRFS_DIR_INDEX_KEY;
2943        key.offset = 0;
2944        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2945        if (ret < 0)
2946                goto out;
2947
2948        while (1) {
2949                struct waiting_dir_move *dm;
2950
2951                if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2952                        ret = btrfs_next_leaf(root, path);
2953                        if (ret < 0)
2954                                goto out;
2955                        else if (ret > 0)
2956                                break;
2957                        continue;
2958                }
2959                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2960                                      path->slots[0]);
2961                if (found_key.objectid != key.objectid ||
2962                    found_key.type != key.type)
2963                        break;
2964
2965                di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2966                                struct btrfs_dir_item);
2967                btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2968
2969                dm = get_waiting_dir_move(sctx, loc.objectid);
2970                if (dm) {
2971                        struct orphan_dir_info *odi;
2972
2973                        odi = add_orphan_dir_info(sctx, dir);
2974                        if (IS_ERR(odi)) {
2975                                ret = PTR_ERR(odi);
2976                                goto out;
2977                        }
2978                        odi->gen = dir_gen;
2979                        dm->rmdir_ino = dir;
2980                        ret = 0;
2981                        goto out;
2982                }
2983
2984                if (loc.objectid > send_progress) {
2985                        struct orphan_dir_info *odi;
2986
2987                        odi = get_orphan_dir_info(sctx, dir);
2988                        free_orphan_dir_info(sctx, odi);
2989                        ret = 0;
2990                        goto out;
2991                }
2992
2993                path->slots[0]++;
2994        }
2995
2996        ret = 1;
2997
2998out:
2999        btrfs_free_path(path);
3000        return ret;
3001}
3002
3003static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
3004{
3005        struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
3006
3007        return entry != NULL;
3008}
3009
3010static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
3011{
3012        struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
3013        struct rb_node *parent = NULL;
3014        struct waiting_dir_move *entry, *dm;
3015
3016        dm = kmalloc(sizeof(*dm), GFP_KERNEL);
3017        if (!dm)
3018                return -ENOMEM;
3019        dm->ino = ino;
3020        dm->rmdir_ino = 0;
3021        dm->orphanized = orphanized;
3022
3023        while (*p) {
3024                parent = *p;
3025                entry = rb_entry(parent, struct waiting_dir_move, node);
3026                if (ino < entry->ino) {
3027                        p = &(*p)->rb_left;
3028                } else if (ino > entry->ino) {
3029                        p = &(*p)->rb_right;
3030                } else {
3031                        kfree(dm);
3032                        return -EEXIST;
3033                }
3034        }
3035
3036        rb_link_node(&dm->node, parent, p);
3037        rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
3038        return 0;
3039}
3040
3041static struct waiting_dir_move *
3042get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
3043{
3044        struct rb_node *n = sctx->waiting_dir_moves.rb_node;
3045        struct waiting_dir_move *entry;
3046
3047        while (n) {
3048                entry = rb_entry(n, struct waiting_dir_move, node);
3049                if (ino < entry->ino)
3050                        n = n->rb_left;
3051                else if (ino > entry->ino)
3052                        n = n->rb_right;
3053                else
3054                        return entry;
3055        }
3056        return NULL;
3057}
3058
3059static void free_waiting_dir_move(struct send_ctx *sctx,
3060                                  struct waiting_dir_move *dm)
3061{
3062        if (!dm)
3063                return;
3064        rb_erase(&dm->node, &sctx->waiting_dir_moves);
3065        kfree(dm);
3066}
3067
3068static int add_pending_dir_move(struct send_ctx *sctx,
3069                                u64 ino,
3070                                u64 ino_gen,
3071                                u64 parent_ino,
3072                                struct list_head *new_refs,
3073                                struct list_head *deleted_refs,
3074                                const bool is_orphan)
3075{
3076        struct rb_node **p = &sctx->pending_dir_moves.rb_node;
3077        struct rb_node *parent = NULL;
3078        struct pending_dir_move *entry = NULL, *pm;
3079        struct recorded_ref *cur;
3080        int exists = 0;
3081        int ret;
3082
3083        pm = kmalloc(sizeof(*pm), GFP_KERNEL);
3084        if (!pm)
3085                return -ENOMEM;
3086        pm->parent_ino = parent_ino;
3087        pm->ino = ino;
3088        pm->gen = ino_gen;
3089        INIT_LIST_HEAD(&pm->list);
3090        INIT_LIST_HEAD(&pm->update_refs);
3091        RB_CLEAR_NODE(&pm->node);
3092
3093        while (*p) {
3094                parent = *p;
3095                entry = rb_entry(parent, struct pending_dir_move, node);
3096                if (parent_ino < entry->parent_ino) {
3097                        p = &(*p)->rb_left;
3098                } else if (parent_ino > entry->parent_ino) {
3099                        p = &(*p)->rb_right;
3100                } else {
3101                        exists = 1;
3102                        break;
3103                }
3104        }
3105
3106        list_for_each_entry(cur, deleted_refs, list) {
3107                ret = dup_ref(cur, &pm->update_refs);
3108                if (ret < 0)
3109                        goto out;
3110        }
3111        list_for_each_entry(cur, new_refs, list) {
3112                ret = dup_ref(cur, &pm->update_refs);
3113                if (ret < 0)
3114                        goto out;
3115        }
3116
3117        ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
3118        if (ret)
3119                goto out;
3120
3121        if (exists) {
3122                list_add_tail(&pm->list, &entry->list);
3123        } else {
3124                rb_link_node(&pm->node, parent, p);
3125                rb_insert_color(&pm->node, &sctx->pending_dir_moves);
3126        }
3127        ret = 0;
3128out:
3129        if (ret) {
3130                __free_recorded_refs(&pm->update_refs);
3131                kfree(pm);
3132        }
3133        return ret;
3134}
3135
3136static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3137                                                      u64 parent_ino)
3138{
3139        struct rb_node *n = sctx->pending_dir_moves.rb_node;
3140        struct pending_dir_move *entry;
3141
3142        while (n) {
3143                entry = rb_entry(n, struct pending_dir_move, node);
3144                if (parent_ino < entry->parent_ino)
3145                        n = n->rb_left;
3146                else if (parent_ino > entry->parent_ino)
3147                        n = n->rb_right;
3148                else
3149                        return entry;
3150        }
3151        return NULL;
3152}
3153
3154static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3155                     u64 ino, u64 gen, u64 *ancestor_ino)
3156{
3157        int ret = 0;
3158        u64 parent_inode = 0;
3159        u64 parent_gen = 0;
3160        u64 start_ino = ino;
3161
3162        *ancestor_ino = 0;
3163        while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3164                fs_path_reset(name);
3165
3166                if (is_waiting_for_rm(sctx, ino))
3167                        break;
3168                if (is_waiting_for_move(sctx, ino)) {
3169                        if (*ancestor_ino == 0)
3170                                *ancestor_ino = ino;
3171                        ret = get_first_ref(sctx->parent_root, ino,
3172                                            &parent_inode, &parent_gen, name);
3173                } else {
3174                        ret = __get_cur_name_and_parent(sctx, ino, gen,
3175                                                        &parent_inode,
3176                                                        &parent_gen, name);
3177                        if (ret > 0) {
3178                                ret = 0;
3179                                break;
3180                        }
3181                }
3182                if (ret < 0)
3183                        break;
3184                if (parent_inode == start_ino) {
3185                        ret = 1;
3186                        if (*ancestor_ino == 0)
3187                                *ancestor_ino = ino;
3188                        break;
3189                }
3190                ino = parent_inode;
3191                gen = parent_gen;
3192        }
3193        return ret;
3194}
3195
3196static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3197{
3198        struct fs_path *from_path = NULL;
3199        struct fs_path *to_path = NULL;
3200        struct fs_path *name = NULL;
3201        u64 orig_progress = sctx->send_progress;
3202        struct recorded_ref *cur;
3203        u64 parent_ino, parent_gen;
3204        struct waiting_dir_move *dm = NULL;
3205        u64 rmdir_ino = 0;
3206        u64 ancestor;
3207        bool is_orphan;
3208        int ret;
3209
3210        name = fs_path_alloc();
3211        from_path = fs_path_alloc();
3212        if (!name || !from_path) {
3213                ret = -ENOMEM;
3214                goto out;
3215        }
3216
3217        dm = get_waiting_dir_move(sctx, pm->ino);
3218        ASSERT(dm);
3219        rmdir_ino = dm->rmdir_ino;
3220        is_orphan = dm->orphanized;
3221        free_waiting_dir_move(sctx, dm);
3222
3223        if (is_orphan) {
3224                ret = gen_unique_name(sctx, pm->ino,
3225                                      pm->gen, from_path);
3226        } else {
3227                ret = get_first_ref(sctx->parent_root, pm->ino,
3228                                    &parent_ino, &parent_gen, name);
3229                if (ret < 0)
3230                        goto out;
3231                ret = get_cur_path(sctx, parent_ino, parent_gen,
3232                                   from_path);
3233                if (ret < 0)
3234                        goto out;
3235                ret = fs_path_add_path(from_path, name);
3236        }
3237        if (ret < 0)
3238                goto out;
3239
3240        sctx->send_progress = sctx->cur_ino + 1;
3241        ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3242        if (ret < 0)
3243                goto out;
3244        if (ret) {
3245                LIST_HEAD(deleted_refs);
3246                ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3247                ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3248                                           &pm->update_refs, &deleted_refs,
3249                                           is_orphan);
3250                if (ret < 0)
3251                        goto out;
3252                if (rmdir_ino) {
3253                        dm = get_waiting_dir_move(sctx, pm->ino);
3254                        ASSERT(dm);
3255                        dm->rmdir_ino = rmdir_ino;
3256                }
3257                goto out;
3258        }
3259        fs_path_reset(name);
3260        to_path = name;
3261        name = NULL;
3262        ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3263        if (ret < 0)
3264                goto out;
3265
3266        ret = send_rename(sctx, from_path, to_path);
3267        if (ret < 0)
3268                goto out;
3269
3270        if (rmdir_ino) {
3271                struct orphan_dir_info *odi;
3272
3273                odi = get_orphan_dir_info(sctx, rmdir_ino);
3274                if (!odi) {
3275                        /* already deleted */
3276                        goto finish;
3277                }
3278                ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
3279                if (ret < 0)
3280                        goto out;
3281                if (!ret)
3282                        goto finish;
3283
3284                name = fs_path_alloc();
3285                if (!name) {
3286                        ret = -ENOMEM;
3287                        goto out;
3288                }
3289                ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
3290                if (ret < 0)
3291                        goto out;
3292                ret = send_rmdir(sctx, name);
3293                if (ret < 0)
3294                        goto out;
3295                free_orphan_dir_info(sctx, odi);
3296        }
3297
3298finish:
3299        ret = send_utimes(sctx, pm->ino, pm->gen);
3300        if (ret < 0)
3301                goto out;
3302
3303        /*
3304         * After rename/move, need to update the utimes of both new parent(s)
3305         * and old parent(s).
3306         */
3307        list_for_each_entry(cur, &pm->update_refs, list) {
3308                /*
3309                 * The parent inode might have been deleted in the send snapshot
3310                 */
3311                ret = get_inode_info(sctx->send_root, cur->dir, NULL,
3312                                     NULL, NULL, NULL, NULL, NULL);
3313                if (ret == -ENOENT) {
3314                        ret = 0;
3315                        continue;
3316                }
3317                if (ret < 0)
3318                        goto out;
3319
3320                ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3321                if (ret < 0)
3322                        goto out;
3323        }
3324
3325out:
3326        fs_path_free(name);
3327        fs_path_free(from_path);
3328        fs_path_free(to_path);
3329        sctx->send_progress = orig_progress;
3330
3331        return ret;
3332}
3333
3334static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3335{
3336        if (!list_empty(&m->list))
3337                list_del(&m->list);
3338        if (!RB_EMPTY_NODE(&m->node))
3339                rb_erase(&m->node, &sctx->pending_dir_moves);
3340        __free_recorded_refs(&m->update_refs);
3341        kfree(m);
3342}
3343
3344static void tail_append_pending_moves(struct pending_dir_move *moves,
3345                                      struct list_head *stack)
3346{
3347        if (list_empty(&moves->list)) {
3348                list_add_tail(&moves->list, stack);
3349        } else {
3350                LIST_HEAD(list);
3351                list_splice_init(&moves->list, &list);
3352                list_add_tail(&moves->list, stack);
3353                list_splice_tail(&list, stack);
3354        }
3355}
3356
3357static int apply_children_dir_moves(struct send_ctx *sctx)
3358{
3359        struct pending_dir_move *pm;
3360        struct list_head stack;
3361        u64 parent_ino = sctx->cur_ino;
3362        int ret = 0;
3363
3364        pm = get_pending_dir_moves(sctx, parent_ino);
3365        if (!pm)
3366                return 0;
3367
3368        INIT_LIST_HEAD(&stack);
3369        tail_append_pending_moves(pm, &stack);
3370
3371        while (!list_empty(&stack)) {
3372                pm = list_first_entry(&stack, struct pending_dir_move, list);
3373                parent_ino = pm->ino;
3374                ret = apply_dir_move(sctx, pm);
3375                free_pending_move(sctx, pm);
3376                if (ret)
3377                        goto out;
3378                pm = get_pending_dir_moves(sctx, parent_ino);
3379                if (pm)
3380                        tail_append_pending_moves(pm, &stack);
3381        }
3382        return 0;
3383
3384out:
3385        while (!list_empty(&stack)) {
3386                pm = list_first_entry(&stack, struct pending_dir_move, list);
3387                free_pending_move(sctx, pm);
3388        }
3389        return ret;
3390}
3391
3392/*
3393 * We might need to delay a directory rename even when no ancestor directory
3394 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3395 * renamed. This happens when we rename a directory to the old name (the name
3396 * in the parent root) of some other unrelated directory that got its rename
3397 * delayed due to some ancestor with higher number that got renamed.
3398 *
3399 * Example:
3400 *
3401 * Parent snapshot:
3402 * .                                       (ino 256)
3403 * |---- a/                                (ino 257)
3404 * |     |---- file                        (ino 260)
3405 * |
3406 * |---- b/                                (ino 258)
3407 * |---- c/                                (ino 259)
3408 *
3409 * Send snapshot:
3410 * .                                       (ino 256)
3411 * |---- a/                                (ino 258)
3412 * |---- x/                                (ino 259)
3413 *       |---- y/                          (ino 257)
3414 *             |----- file                 (ino 260)
3415 *
3416 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3417 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3418 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3419 * must issue is:
3420 *
3421 * 1 - rename 259 from 'c' to 'x'
3422 * 2 - rename 257 from 'a' to 'x/y'
3423 * 3 - rename 258 from 'b' to 'a'
3424 *
3425 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3426 * be done right away and < 0 on error.
3427 */
3428static int wait_for_dest_dir_move(struct send_ctx *sctx,
3429                                  struct recorded_ref *parent_ref,
3430                                  const bool is_orphan)
3431{
3432        struct btrfs_path *path;
3433        struct btrfs_key key;
3434        struct btrfs_key di_key;
3435        struct btrfs_dir_item *di;
3436        u64 left_gen;
3437        u64 right_gen;
3438        int ret = 0;
3439        struct waiting_dir_move *wdm;
3440
3441        if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3442                return 0;
3443
3444        path = alloc_path_for_send();
3445        if (!path)
3446                return -ENOMEM;
3447
3448        key.objectid = parent_ref->dir;
3449        key.type = BTRFS_DIR_ITEM_KEY;
3450        key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3451
3452        ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3453        if (ret < 0) {
3454                goto out;
3455        } else if (ret > 0) {
3456                ret = 0;
3457                goto out;
3458        }
3459
3460        di = btrfs_match_dir_item_name(sctx->parent_root, path,
3461                                       parent_ref->name, parent_ref->name_len);
3462        if (!di) {
3463                ret = 0;
3464                goto out;
3465        }
3466        /*
3467         * di_key.objectid has the number of the inode that has a dentry in the
3468         * parent directory with the same name that sctx->cur_ino is being
3469         * renamed to. We need to check if that inode is in the send root as
3470         * well and if it is currently marked as an inode with a pending rename,
3471         * if it is, we need to delay the rename of sctx->cur_ino as well, so
3472         * that it happens after that other inode is renamed.
3473         */
3474        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3475        if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3476                ret = 0;
3477                goto out;
3478        }
3479
3480        ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3481                             &left_gen, NULL, NULL, NULL, NULL);
3482        if (ret < 0)
3483                goto out;
3484        ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3485                             &right_gen, NULL, NULL, NULL, NULL);
3486        if (ret < 0) {
3487                if (ret == -ENOENT)
3488                        ret = 0;
3489                goto out;
3490        }
3491
3492        /* Different inode, no need to delay the rename of sctx->cur_ino */
3493        if (right_gen != left_gen) {
3494                ret = 0;
3495                goto out;
3496        }
3497
3498        wdm = get_waiting_dir_move(sctx, di_key.objectid);
3499        if (wdm && !wdm->orphanized) {
3500                ret = add_pending_dir_move(sctx,
3501                                           sctx->cur_ino,
3502                                           sctx->cur_inode_gen,
3503                                           di_key.objectid,
3504                                           &sctx->new_refs,
3505                                           &sctx->deleted_refs,
3506                                           is_orphan);
3507                if (!ret)
3508                        ret = 1;
3509        }
3510out:
3511        btrfs_free_path(path);
3512        return ret;
3513}
3514
3515/*
3516 * Check if ino ino1 is an ancestor of inode ino2 in the given root.
3517 * Return 1 if true, 0 if false and < 0 on error.
3518 */
3519static int is_ancestor(struct btrfs_root *root,
3520                       const u64 ino1,
3521                       const u64 ino1_gen,
3522                       const u64 ino2,
3523                       struct fs_path *fs_path)
3524{
3525        u64 ino = ino2;
3526
3527        while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3528                int ret;
3529                u64 parent;
3530                u64 parent_gen;
3531
3532                fs_path_reset(fs_path);
3533                ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
3534                if (ret < 0) {
3535                        if (ret == -ENOENT && ino == ino2)
3536                                ret = 0;
3537                        return ret;
3538                }
3539                if (parent == ino1)
3540                        return parent_gen == ino1_gen ? 1 : 0;
3541                ino = parent;
3542        }
3543        return 0;
3544}
3545
3546static int wait_for_parent_move(struct send_ctx *sctx,
3547                                struct recorded_ref *parent_ref,
3548                                const bool is_orphan)
3549{
3550        int ret = 0;
3551        u64 ino = parent_ref->dir;
3552        u64 parent_ino_before, parent_ino_after;
3553        struct fs_path *path_before = NULL;
3554        struct fs_path *path_after = NULL;
3555        int len1, len2;
3556
3557        path_after = fs_path_alloc();
3558        path_before = fs_path_alloc();
3559        if (!path_after || !path_before) {
3560                ret = -ENOMEM;
3561                goto out;
3562        }
3563
3564        /*
3565         * Our current directory inode may not yet be renamed/moved because some
3566         * ancestor (immediate or not) has to be renamed/moved first. So find if
3567         * such ancestor exists and make sure our own rename/move happens after
3568         * that ancestor is processed to avoid path build infinite loops (done
3569         * at get_cur_path()).
3570         */
3571        while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3572                if (is_waiting_for_move(sctx, ino)) {
3573                        /*
3574                         * If the current inode is an ancestor of ino in the
3575                         * parent root, we need to delay the rename of the
3576                         * current inode, otherwise don't delayed the rename
3577                         * because we can end up with a circular dependency
3578                         * of renames, resulting in some directories never
3579                         * getting the respective rename operations issued in
3580                         * the send stream or getting into infinite path build
3581                         * loops.
3582                         */
3583                        ret = is_ancestor(sctx->parent_root,
3584                                          sctx->cur_ino, sctx->cur_inode_gen,
3585                                          ino, path_before);
3586                        if (ret)
3587                                break;
3588                }
3589
3590                fs_path_reset(path_before);
3591                fs_path_reset(path_after);
3592
3593                ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3594                                    NULL, path_after);
3595                if (ret < 0)
3596                        goto out;
3597                ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3598                                    NULL, path_before);
3599                if (ret < 0 && ret != -ENOENT) {
3600                        goto out;
3601                } else if (ret == -ENOENT) {
3602                        ret = 0;
3603                        break;
3604                }
3605
3606                len1 = fs_path_len(path_before);
3607                len2 = fs_path_len(path_after);
3608                if (ino > sctx->cur_ino &&
3609                    (parent_ino_before != parent_ino_after || len1 != len2 ||
3610                     memcmp(path_before->start, path_after->start, len1))) {
3611                        ret = 1;
3612                        break;
3613                }
3614                ino = parent_ino_after;
3615        }
3616
3617out:
3618        fs_path_free(path_before);
3619        fs_path_free(path_after);
3620
3621        if (ret == 1) {
3622                ret = add_pending_dir_move(sctx,
3623                                           sctx->cur_ino,
3624                                           sctx->cur_inode_gen,
3625                                           ino,
3626                                           &sctx->new_refs,
3627                                           &sctx->deleted_refs,
3628                                           is_orphan);
3629                if (!ret)
3630                        ret = 1;
3631        }
3632
3633        return ret;
3634}
3635
3636/*
3637 * This does all the move/link/unlink/rmdir magic.
3638 */
3639static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3640{
3641        int ret = 0;
3642        struct recorded_ref *cur;
3643        struct recorded_ref *cur2;
3644        struct list_head check_dirs;
3645        struct fs_path *valid_path = NULL;
3646        u64 ow_inode = 0;
3647        u64 ow_gen;
3648        int did_overwrite = 0;
3649        int is_orphan = 0;
3650        u64 last_dir_ino_rm = 0;
3651        bool can_rename = true;
3652
3653verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3654
3655        /*
3656         * This should never happen as the root dir always has the same ref
3657         * which is always '..'
3658         */
3659        BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3660        INIT_LIST_HEAD(&check_dirs);
3661
3662        valid_path = fs_path_alloc();
3663        if (!valid_path) {
3664                ret = -ENOMEM;
3665                goto out;
3666        }
3667
3668        /*
3669         * First, check if the first ref of the current inode was overwritten
3670         * before. If yes, we know that the current inode was already orphanized
3671         * and thus use the orphan name. If not, we can use get_cur_path to
3672         * get the path of the first ref as it would like while receiving at
3673         * this point in time.
3674         * New inodes are always orphan at the beginning, so force to use the
3675         * orphan name in this case.
3676         * The first ref is stored in valid_path and will be updated if it
3677         * gets moved around.
3678         */
3679        if (!sctx->cur_inode_new) {
3680                ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3681                                sctx->cur_inode_gen);
3682                if (ret < 0)
3683                        goto out;
3684                if (ret)
3685                        did_overwrite = 1;
3686        }
3687        if (sctx->cur_inode_new || did_overwrite) {
3688                ret = gen_unique_name(sctx, sctx->cur_ino,
3689                                sctx->cur_inode_gen, valid_path);
3690                if (ret < 0)
3691                        goto out;
3692                is_orphan = 1;
3693        } else {
3694                ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3695                                valid_path);
3696                if (ret < 0)
3697                        goto out;
3698        }
3699
3700        list_for_each_entry(cur, &sctx->new_refs, list) {
3701                /*
3702                 * We may have refs where the parent directory does not exist
3703                 * yet. This happens if the parent directories inum is higher
3704                 * the the current inum. To handle this case, we create the
3705                 * parent directory out of order. But we need to check if this
3706                 * did already happen before due to other refs in the same dir.
3707                 */
3708                ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3709                if (ret < 0)
3710                        goto out;
3711                if (ret == inode_state_will_create) {
3712                        ret = 0;
3713                        /*
3714                         * First check if any of the current inodes refs did
3715                         * already create the dir.
3716                         */
3717                        list_for_each_entry(cur2, &sctx->new_refs, list) {
3718                                if (cur == cur2)
3719                                        break;
3720                                if (cur2->dir == cur->dir) {
3721                                        ret = 1;
3722                                        break;
3723                                }
3724                        }
3725
3726                        /*
3727                         * If that did not happen, check if a previous inode
3728                         * did already create the dir.
3729                         */
3730                        if (!ret)
3731                                ret = did_create_dir(sctx, cur->dir);
3732                        if (ret < 0)
3733                                goto out;
3734                        if (!ret) {
3735                                ret = send_create_inode(sctx, cur->dir);
3736                                if (ret < 0)
3737                                        goto out;
3738                        }
3739                }
3740
3741                /*
3742                 * Check if this new ref would overwrite the first ref of
3743                 * another unprocessed inode. If yes, orphanize the
3744                 * overwritten inode. If we find an overwritten ref that is
3745                 * not the first ref, simply unlink it.
3746                 */
3747                ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3748                                cur->name, cur->name_len,
3749                                &ow_inode, &ow_gen);
3750                if (ret < 0)
3751                        goto out;
3752                if (ret) {
3753                        ret = is_first_ref(sctx->parent_root,
3754                                           ow_inode, cur->dir, cur->name,
3755                                           cur->name_len);
3756                        if (ret < 0)
3757                                goto out;
3758                        if (ret) {
3759                                struct name_cache_entry *nce;
3760                                struct waiting_dir_move *wdm;
3761
3762                                ret = orphanize_inode(sctx, ow_inode, ow_gen,
3763                                                cur->full_path);
3764                                if (ret < 0)
3765                                        goto out;
3766
3767                                /*
3768                                 * If ow_inode has its rename operation delayed
3769                                 * make sure that its orphanized name is used in
3770                                 * the source path when performing its rename
3771                                 * operation.
3772                                 */
3773                                if (is_waiting_for_move(sctx, ow_inode)) {
3774                                        wdm = get_waiting_dir_move(sctx,
3775                                                                   ow_inode);
3776                                        ASSERT(wdm);
3777                                        wdm->orphanized = true;
3778                                }
3779
3780                                /*
3781                                 * Make sure we clear our orphanized inode's
3782                                 * name from the name cache. This is because the
3783                                 * inode ow_inode might be an ancestor of some
3784                                 * other inode that will be orphanized as well
3785                                 * later and has an inode number greater than
3786                                 * sctx->send_progress. We need to prevent
3787                                 * future name lookups from using the old name
3788                                 * and get instead the orphan name.
3789                                 */
3790                                nce = name_cache_search(sctx, ow_inode, ow_gen);
3791                                if (nce) {
3792                                        name_cache_delete(sctx, nce);
3793                                        kfree(nce);
3794                                }
3795
3796                                /*
3797                                 * ow_inode might currently be an ancestor of
3798                                 * cur_ino, therefore compute valid_path (the
3799                                 * current path of cur_ino) again because it
3800                                 * might contain the pre-orphanization name of
3801                                 * ow_inode, which is no longer valid.
3802                                 */
3803                                fs_path_reset(valid_path);
3804                                ret = get_cur_path(sctx, sctx->cur_ino,
3805                                           sctx->cur_inode_gen, valid_path);
3806                                if (ret < 0)
3807                                        goto out;
3808                        } else {
3809                                ret = send_unlink(sctx, cur->full_path);
3810                                if (ret < 0)
3811                                        goto out;
3812                        }
3813                }
3814
3815                if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3816                        ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3817                        if (ret < 0)
3818                                goto out;
3819                        if (ret == 1) {
3820                                can_rename = false;
3821                                *pending_move = 1;
3822                        }
3823                }
3824
3825                if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
3826                    can_rename) {
3827                        ret = wait_for_parent_move(sctx, cur, is_orphan);
3828                        if (ret < 0)
3829                                goto out;
3830                        if (ret == 1) {
3831                                can_rename = false;
3832                                *pending_move = 1;
3833                        }
3834                }
3835
3836                /*
3837                 * link/move the ref to the new place. If we have an orphan
3838                 * inode, move it and update valid_path. If not, link or move
3839                 * it depending on the inode mode.
3840                 */
3841                if (is_orphan && can_rename) {
3842                        ret = send_rename(sctx, valid_path, cur->full_path);
3843                        if (ret < 0)
3844                                goto out;
3845                        is_orphan = 0;
3846                        ret = fs_path_copy(valid_path, cur->full_path);
3847                        if (ret < 0)
3848                                goto out;
3849                } else if (can_rename) {
3850                        if (S_ISDIR(sctx->cur_inode_mode)) {
3851                                /*
3852                                 * Dirs can't be linked, so move it. For moved
3853                                 * dirs, we always have one new and one deleted
3854                                 * ref. The deleted ref is ignored later.
3855                                 */
3856                                ret = send_rename(sctx, valid_path,
3857                                                  cur->full_path);
3858                                if (!ret)
3859                                        ret = fs_path_copy(valid_path,
3860                                                           cur->full_path);
3861                                if (ret < 0)
3862                                        goto out;
3863                        } else {
3864                                ret = send_link(sctx, cur->full_path,
3865                                                valid_path);
3866                                if (ret < 0)
3867                                        goto out;
3868                        }
3869                }
3870                ret = dup_ref(cur, &check_dirs);
3871                if (ret < 0)
3872                        goto out;
3873        }
3874
3875        if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
3876                /*
3877                 * Check if we can already rmdir the directory. If not,
3878                 * orphanize it. For every dir item inside that gets deleted
3879                 * later, we do this check again and rmdir it then if possible.
3880                 * See the use of check_dirs for more details.
3881                 */
3882                ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3883                                sctx->cur_ino);
3884                if (ret < 0)
3885                        goto out;
3886                if (ret) {
3887                        ret = send_rmdir(sctx, valid_path);
3888                        if (ret < 0)
3889                                goto out;
3890                } else if (!is_orphan) {
3891                        ret = orphanize_inode(sctx, sctx->cur_ino,
3892                                        sctx->cur_inode_gen, valid_path);
3893                        if (ret < 0)
3894                                goto out;
3895                        is_orphan = 1;
3896                }
3897
3898                list_for_each_entry(cur, &sctx->deleted_refs, list) {
3899                        ret = dup_ref(cur, &check_dirs);
3900                        if (ret < 0)
3901                                goto out;
3902                }
3903        } else if (S_ISDIR(sctx->cur_inode_mode) &&
3904                   !list_empty(&sctx->deleted_refs)) {
3905                /*
3906                 * We have a moved dir. Add the old parent to check_dirs
3907                 */
3908                cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
3909                                list);
3910                ret = dup_ref(cur, &check_dirs);
3911                if (ret < 0)
3912                        goto out;
3913        } else if (!S_ISDIR(sctx->cur_inode_mode)) {
3914                /*
3915                 * We have a non dir inode. Go through all deleted refs and
3916                 * unlink them if they were not already overwritten by other
3917                 * inodes.
3918                 */
3919                list_for_each_entry(cur, &sctx->deleted_refs, list) {
3920                        ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3921                                        sctx->cur_ino, sctx->cur_inode_gen,
3922                                        cur->name, cur->name_len);
3923                        if (ret < 0)
3924                                goto out;
3925                        if (!ret) {
3926                                ret = send_unlink(sctx, cur->full_path);
3927                                if (ret < 0)
3928                                        goto out;
3929                        }
3930                        ret = dup_ref(cur, &check_dirs);
3931                        if (ret < 0)
3932                                goto out;
3933                }
3934                /*
3935                 * If the inode is still orphan, unlink the orphan. This may
3936                 * happen when a previous inode did overwrite the first ref
3937                 * of this inode and no new refs were added for the current
3938                 * inode. Unlinking does not mean that the inode is deleted in
3939                 * all cases. There may still be links to this inode in other
3940                 * places.
3941                 */
3942                if (is_orphan) {
3943                        ret = send_unlink(sctx, valid_path);
3944                        if (ret < 0)
3945                                goto out;
3946                }
3947        }
3948
3949        /*
3950         * We did collect all parent dirs where cur_inode was once located. We
3951         * now go through all these dirs and check if they are pending for
3952         * deletion and if it's finally possible to perform the rmdir now.
3953         * We also update the inode stats of the parent dirs here.
3954         */
3955        list_for_each_entry(cur, &check_dirs, list) {
3956                /*
3957                 * In case we had refs into dirs that were not processed yet,
3958                 * we don't need to do the utime and rmdir logic for these dirs.
3959                 * The dir will be processed later.
3960                 */
3961                if (cur->dir > sctx->cur_ino)
3962                        continue;
3963
3964                ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3965                if (ret < 0)
3966                        goto out;
3967
3968                if (ret == inode_state_did_create ||
3969                    ret == inode_state_no_change) {
3970                        /* TODO delayed utimes */
3971                        ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3972                        if (ret < 0)
3973                                goto out;
3974                } else if (ret == inode_state_did_delete &&
3975                           cur->dir != last_dir_ino_rm) {
3976                        ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
3977                                        sctx->cur_ino);
3978                        if (ret < 0)
3979                                goto out;
3980                        if (ret) {
3981                                ret = get_cur_path(sctx, cur->dir,
3982                                                   cur->dir_gen, valid_path);
3983                                if (ret < 0)
3984                                        goto out;
3985                                ret = send_rmdir(sctx, valid_path);
3986                                if (ret < 0)
3987                                        goto out;
3988                                last_dir_ino_rm = cur->dir;
3989                        }
3990                }
3991        }
3992
3993        ret = 0;
3994
3995out:
3996        __free_recorded_refs(&check_dirs);
3997        free_recorded_refs(sctx);
3998        fs_path_free(valid_path);
3999        return ret;
4000}
4001
4002static int record_ref(struct btrfs_root *root, int num, u64 dir, int index,
4003                      struct fs_path *name, void *ctx, struct list_head *refs)
4004{
4005        int ret = 0;
4006        struct send_ctx *sctx = ctx;
4007        struct fs_path *p;
4008        u64 gen;
4009
4010        p = fs_path_alloc();
4011        if (!p)
4012                return -ENOMEM;
4013
4014        ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL,
4015                        NULL, NULL);
4016        if (ret < 0)
4017                goto out;
4018
4019        ret = get_cur_path(sctx, dir, gen, p);
4020        if (ret < 0)
4021                goto out;
4022        ret = fs_path_add_path(p, name);
4023        if (ret < 0)
4024                goto out;
4025
4026        ret = __record_ref(refs, dir, gen, p);
4027
4028out:
4029        if (ret)
4030                fs_path_free(p);
4031        return ret;
4032}
4033
4034static int __record_new_ref(int num, u64 dir, int index,
4035                            struct fs_path *name,
4036                            void *ctx)
4037{
4038        struct send_ctx *sctx = ctx;
4039        return record_ref(sctx->send_root, num, dir, index, name,
4040                          ctx, &sctx->new_refs);
4041}
4042
4043
4044static int __record_deleted_ref(int num, u64 dir, int index,
4045                                struct fs_path *name,
4046                                void *ctx)
4047{
4048        struct send_ctx *sctx = ctx;
4049        return record_ref(sctx->parent_root, num, dir, index, name,
4050                          ctx, &sctx->deleted_refs);
4051}
4052
4053static int record_new_ref(struct send_ctx *sctx)
4054{
4055        int ret;
4056
4057        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4058                                sctx->cmp_key, 0, __record_new_ref, sctx);
4059        if (ret < 0)
4060                goto out;
4061        ret = 0;
4062
4063out:
4064        return ret;
4065}
4066
4067static int record_deleted_ref(struct send_ctx *sctx)
4068{
4069        int ret;
4070
4071        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4072                                sctx->cmp_key, 0, __record_deleted_ref, sctx);
4073        if (ret < 0)
4074                goto out;
4075        ret = 0;
4076
4077out:
4078        return ret;
4079}
4080
4081struct find_ref_ctx {
4082        u64 dir;
4083        u64 dir_gen;
4084        struct btrfs_root *root;
4085        struct fs_path *name;
4086        int found_idx;
4087};
4088
4089static int __find_iref(int num, u64 dir, int index,
4090                       struct fs_path *name,
4091                       void *ctx_)
4092{
4093        struct find_ref_ctx *ctx = ctx_;
4094        u64 dir_gen;
4095        int ret;
4096
4097        if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
4098            strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
4099                /*
4100                 * To avoid doing extra lookups we'll only do this if everything
4101                 * else matches.
4102                 */
4103                ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
4104                                     NULL, NULL, NULL);
4105                if (ret)
4106                        return ret;
4107                if (dir_gen != ctx->dir_gen)
4108                        return 0;
4109                ctx->found_idx = num;
4110                return 1;
4111        }
4112        return 0;
4113}
4114
4115static int find_iref(struct btrfs_root *root,
4116                     struct btrfs_path *path,
4117                     struct btrfs_key *key,
4118                     u64 dir, u64 dir_gen, struct fs_path *name)
4119{
4120        int ret;
4121        struct find_ref_ctx ctx;
4122
4123        ctx.dir = dir;
4124        ctx.name = name;
4125        ctx.dir_gen = dir_gen;
4126        ctx.found_idx = -1;
4127        ctx.root = root;
4128
4129        ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
4130        if (ret < 0)
4131                return ret;
4132
4133        if (ctx.found_idx == -1)
4134                return -ENOENT;
4135
4136        return ctx.found_idx;
4137}
4138
4139static int __record_changed_new_ref(int num, u64 dir, int index,
4140                                    struct fs_path *name,
4141                                    void *ctx)
4142{
4143        u64 dir_gen;
4144        int ret;
4145        struct send_ctx *sctx = ctx;
4146
4147        ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
4148                             NULL, NULL, NULL);
4149        if (ret)
4150                return ret;
4151
4152        ret = find_iref(sctx->parent_root, sctx->right_path,
4153                        sctx->cmp_key, dir, dir_gen, name);
4154        if (ret == -ENOENT)
4155                ret = __record_new_ref(num, dir, index, name, sctx);
4156        else if (ret > 0)
4157                ret = 0;
4158
4159        return ret;
4160}
4161
4162static int __record_changed_deleted_ref(int num, u64 dir, int index,
4163                                        struct fs_path *name,
4164                                        void *ctx)
4165{
4166        u64 dir_gen;
4167        int ret;
4168        struct send_ctx *sctx = ctx;
4169
4170        ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
4171                             NULL, NULL, NULL);
4172        if (ret)
4173                return ret;
4174
4175        ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
4176                        dir, dir_gen, name);
4177        if (ret == -ENOENT)
4178                ret = __record_deleted_ref(num, dir, index, name, sctx);
4179        else if (ret > 0)
4180                ret = 0;
4181
4182        return ret;
4183}
4184
4185static int record_changed_ref(struct send_ctx *sctx)
4186{
4187        int ret = 0;
4188
4189        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4190                        sctx->cmp_key, 0, __record_changed_new_ref, sctx);
4191        if (ret < 0)
4192                goto out;
4193        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4194                        sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
4195        if (ret < 0)
4196                goto out;
4197        ret = 0;
4198
4199out:
4200        return ret;
4201}
4202
4203/*
4204 * Record and process all refs at once. Needed when an inode changes the
4205 * generation number, which means that it was deleted and recreated.
4206 */
4207static int process_all_refs(struct send_ctx *sctx,
4208                            enum btrfs_compare_tree_result cmd)
4209{
4210        int ret;
4211        struct btrfs_root *root;
4212        struct btrfs_path *path;
4213        struct btrfs_key key;
4214        struct btrfs_key found_key;
4215        struct extent_buffer *eb;
4216        int slot;
4217        iterate_inode_ref_t cb;
4218        int pending_move = 0;
4219
4220        path = alloc_path_for_send();
4221        if (!path)
4222                return -ENOMEM;
4223
4224        if (cmd == BTRFS_COMPARE_TREE_NEW) {
4225                root = sctx->send_root;
4226                cb = __record_new_ref;
4227        } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
4228                root = sctx->parent_root;
4229                cb = __record_deleted_ref;
4230        } else {
4231                btrfs_err(sctx->send_root->fs_info,
4232                                "Wrong command %d in process_all_refs", cmd);
4233                ret = -EINVAL;
4234                goto out;
4235        }
4236
4237        key.objectid = sctx->cmp_key->objectid;
4238        key.type = BTRFS_INODE_REF_KEY;
4239        key.offset = 0;
4240        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4241        if (ret < 0)
4242                goto out;
4243
4244        while (1) {
4245                eb = path->nodes[0];
4246                slot = path->slots[0];
4247                if (slot >= btrfs_header_nritems(eb)) {
4248                        ret = btrfs_next_leaf(root, path);
4249                        if (ret < 0)
4250                                goto out;
4251                        else if (ret > 0)
4252                                break;
4253                        continue;
4254                }
4255
4256                btrfs_item_key_to_cpu(eb, &found_key, slot);
4257
4258                if (found_key.objectid != key.objectid ||
4259                    (found_key.type != BTRFS_INODE_REF_KEY &&
4260                     found_key.type != BTRFS_INODE_EXTREF_KEY))
4261                        break;
4262
4263                ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
4264                if (ret < 0)
4265                        goto out;
4266
4267                path->slots[0]++;
4268        }
4269        btrfs_release_path(path);
4270
4271        /*
4272         * We don't actually care about pending_move as we are simply
4273         * re-creating this inode and will be rename'ing it into place once we
4274         * rename the parent directory.
4275         */
4276        ret = process_recorded_refs(sctx, &pending_move);
4277out:
4278        btrfs_free_path(path);
4279        return ret;
4280}
4281
4282static int send_set_xattr(struct send_ctx *sctx,
4283                          struct fs_path *path,
4284                          const char *name, int name_len,
4285                          const char *data, int data_len)
4286{
4287        int ret = 0;
4288
4289        ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
4290        if (ret < 0)
4291                goto out;
4292
4293        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4294        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4295        TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
4296
4297        ret = send_cmd(sctx);
4298
4299tlv_put_failure:
4300out:
4301        return ret;
4302}
4303
4304static int send_remove_xattr(struct send_ctx *sctx,
4305                          struct fs_path *path,
4306                          const char *name, int name_len)
4307{
4308        int ret = 0;
4309
4310        ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
4311        if (ret < 0)
4312                goto out;
4313
4314        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4315        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4316
4317        ret = send_cmd(sctx);
4318
4319tlv_put_failure:
4320out:
4321        return ret;
4322}
4323
4324static int __process_new_xattr(int num, struct btrfs_key *di_key,
4325                               const char *name, int name_len,
4326                               const char *data, int data_len,
4327                               u8 type, void *ctx)
4328{
4329        int ret;
4330        struct send_ctx *sctx = ctx;
4331        struct fs_path *p;
4332        posix_acl_xattr_header dummy_acl;
4333
4334        p = fs_path_alloc();
4335        if (!p)
4336                return -ENOMEM;
4337
4338        /*
4339         * This hack is needed because empty acls are stored as zero byte
4340         * data in xattrs. Problem with that is, that receiving these zero byte
4341         * acls will fail later. To fix this, we send a dummy acl list that
4342         * only contains the version number and no entries.
4343         */
4344        if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
4345            !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
4346                if (data_len == 0) {
4347                        dummy_acl.a_version =
4348                                        cpu_to_le32(POSIX_ACL_XATTR_VERSION);
4349                        data = (char *)&dummy_acl;
4350                        data_len = sizeof(dummy_acl);
4351                }
4352        }
4353
4354        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4355        if (ret < 0)
4356                goto out;
4357
4358        ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
4359
4360out:
4361        fs_path_free(p);
4362        return ret;
4363}
4364
4365static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
4366                                   const char *name, int name_len,
4367                                   const char *data, int data_len,
4368                                   u8 type, void *ctx)
4369{
4370        int ret;
4371        struct send_ctx *sctx = ctx;
4372        struct fs_path *p;
4373
4374        p = fs_path_alloc();
4375        if (!p)
4376                return -ENOMEM;
4377
4378        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4379        if (ret < 0)
4380                goto out;
4381
4382        ret = send_remove_xattr(sctx, p, name, name_len);
4383
4384out:
4385        fs_path_free(p);
4386        return ret;
4387}
4388
4389static int process_new_xattr(struct send_ctx *sctx)
4390{
4391        int ret = 0;
4392
4393        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4394                               sctx->cmp_key, __process_new_xattr, sctx);
4395
4396        return ret;
4397}
4398
4399static int process_deleted_xattr(struct send_ctx *sctx)
4400{
4401        int ret;
4402
4403        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4404                               sctx->cmp_key, __process_deleted_xattr, sctx);
4405
4406        return ret;
4407}
4408
4409struct find_xattr_ctx {
4410        const char *name;
4411        int name_len;
4412        int found_idx;
4413        char *found_data;
4414        int found_data_len;
4415};
4416
4417static int __find_xattr(int num, struct btrfs_key *di_key,
4418                        const char *name, int name_len,
4419                        const char *data, int data_len,
4420                        u8 type, void *vctx)
4421{
4422        struct find_xattr_ctx *ctx = vctx;
4423
4424        if (name_len == ctx->name_len &&
4425            strncmp(name, ctx->name, name_len) == 0) {
4426                ctx->found_idx = num;
4427                ctx->found_data_len = data_len;
4428                ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
4429                if (!ctx->found_data)
4430                        return -ENOMEM;
4431                return 1;
4432        }
4433        return 0;
4434}
4435
4436static int find_xattr(struct btrfs_root *root,
4437                      struct btrfs_path *path,
4438                      struct btrfs_key *key,
4439                      const char *name, int name_len,
4440                      char **data, int *data_len)
4441{
4442        int ret;
4443        struct find_xattr_ctx ctx;
4444
4445        ctx.name = name;
4446        ctx.name_len = name_len;
4447        ctx.found_idx = -1;
4448        ctx.found_data = NULL;
4449        ctx.found_data_len = 0;
4450
4451        ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
4452        if (ret < 0)
4453                return ret;
4454
4455        if (ctx.found_idx == -1)
4456                return -ENOENT;
4457        if (data) {
4458                *data = ctx.found_data;
4459                *data_len = ctx.found_data_len;
4460        } else {
4461                kfree(ctx.found_data);
4462        }
4463        return ctx.found_idx;
4464}
4465
4466
4467static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4468                                       const char *name, int name_len,
4469                                       const char *data, int data_len,
4470                                       u8 type, void *ctx)
4471{
4472        int ret;
4473        struct send_ctx *sctx = ctx;
4474        char *found_data = NULL;
4475        int found_data_len  = 0;
4476
4477        ret = find_xattr(sctx->parent_root, sctx->right_path,
4478                         sctx->cmp_key, name, name_len, &found_data,
4479                         &found_data_len);
4480        if (ret == -ENOENT) {
4481                ret = __process_new_xattr(num, di_key, name, name_len, data,
4482                                data_len, type, ctx);
4483        } else if (ret >= 0) {
4484                if (data_len != found_data_len ||
4485                    memcmp(data, found_data, data_len)) {
4486                        ret = __process_new_xattr(num, di_key, name, name_len,
4487                                        data, data_len, type, ctx);
4488                } else {
4489                        ret = 0;
4490                }
4491        }
4492
4493        kfree(found_data);
4494        return ret;
4495}
4496
4497static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4498                                           const char *name, int name_len,
4499                                           const char *data, int data_len,
4500                                           u8 type, void *ctx)
4501{
4502        int ret;
4503        struct send_ctx *sctx = ctx;
4504
4505        ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4506                         name, name_len, NULL, NULL);
4507        if (ret == -ENOENT)
4508                ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4509                                data_len, type, ctx);
4510        else if (ret >= 0)
4511                ret = 0;
4512
4513        return ret;
4514}
4515
4516static int process_changed_xattr(struct send_ctx *sctx)
4517{
4518        int ret = 0;
4519
4520        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4521                        sctx->cmp_key, __process_changed_new_xattr, sctx);
4522        if (ret < 0)
4523                goto out;
4524        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4525                        sctx->cmp_key, __process_changed_deleted_xattr, sctx);
4526
4527out:
4528        return ret;
4529}
4530
4531static int process_all_new_xattrs(struct send_ctx *sctx)
4532{
4533        int ret;
4534        struct btrfs_root *root;
4535        struct btrfs_path *path;
4536        struct btrfs_key key;
4537        struct btrfs_key found_key;
4538        struct extent_buffer *eb;
4539        int slot;
4540
4541        path = alloc_path_for_send();
4542        if (!path)
4543                return -ENOMEM;
4544
4545        root = sctx->send_root;
4546
4547        key.objectid = sctx->cmp_key->objectid;
4548        key.type = BTRFS_XATTR_ITEM_KEY;
4549        key.offset = 0;
4550        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4551        if (ret < 0)
4552                goto out;
4553
4554        while (1) {
4555                eb = path->nodes[0];
4556                slot = path->slots[0];
4557                if (slot >= btrfs_header_nritems(eb)) {
4558                        ret = btrfs_next_leaf(root, path);
4559                        if (ret < 0) {
4560                                goto out;
4561                        } else if (ret > 0) {
4562                                ret = 0;
4563                                break;
4564                        }
4565                        continue;
4566                }
4567
4568                btrfs_item_key_to_cpu(eb, &found_key, slot);
4569                if (found_key.objectid != key.objectid ||
4570                    found_key.type != key.type) {
4571                        ret = 0;
4572                        goto out;
4573                }
4574
4575                ret = iterate_dir_item(root, path, &found_key,
4576                                       __process_new_xattr, sctx);
4577                if (ret < 0)
4578                        goto out;
4579
4580                path->slots[0]++;
4581        }
4582
4583out:
4584        btrfs_free_path(path);
4585        return ret;
4586}
4587
4588static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4589{
4590        struct btrfs_root *root = sctx->send_root;
4591        struct btrfs_fs_info *fs_info = root->fs_info;
4592        struct inode *inode;
4593        struct page *page;
4594        char *addr;
4595        struct btrfs_key key;
4596        pgoff_t index = offset >> PAGE_SHIFT;
4597        pgoff_t last_index;
4598        unsigned pg_offset = offset & ~PAGE_MASK;
4599        ssize_t ret = 0;
4600
4601        key.objectid = sctx->cur_ino;
4602        key.type = BTRFS_INODE_ITEM_KEY;
4603        key.offset = 0;
4604
4605        inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4606        if (IS_ERR(inode))
4607                return PTR_ERR(inode);
4608
4609        if (offset + len > i_size_read(inode)) {
4610                if (offset > i_size_read(inode))
4611                        len = 0;
4612                else
4613                        len = offset - i_size_read(inode);
4614        }
4615        if (len == 0)
4616                goto out;
4617
4618        last_index = (offset + len - 1) >> PAGE_SHIFT;
4619
4620        /* initial readahead */
4621        memset(&sctx->ra, 0, sizeof(struct file_ra_state));
4622        file_ra_state_init(&sctx->ra, inode->i_mapping);
4623        btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index,
4624                       last_index - index + 1);
4625
4626        while (index <= last_index) {
4627                unsigned cur_len = min_t(unsigned, len,
4628                                         PAGE_SIZE - pg_offset);
4629                page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
4630                if (!page) {
4631                        ret = -ENOMEM;
4632                        break;
4633                }
4634
4635                if (!PageUptodate(page)) {
4636                        btrfs_readpage(NULL, page);
4637                        lock_page(page);
4638                        if (!PageUptodate(page)) {
4639                                unlock_page(page);
4640                                put_page(page);
4641                                ret = -EIO;
4642                                break;
4643                        }
4644                }
4645
4646                addr = kmap(page);
4647                memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4648                kunmap(page);
4649                unlock_page(page);
4650                put_page(page);
4651                index++;
4652                pg_offset = 0;
4653                len -= cur_len;
4654                ret += cur_len;
4655        }
4656out:
4657        iput(inode);
4658        return ret;
4659}
4660
4661/*
4662 * Read some bytes from the current inode/file and send a write command to
4663 * user space.
4664 */
4665static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4666{
4667        int ret = 0;
4668        struct fs_path *p;
4669        ssize_t num_read = 0;
4670
4671        p = fs_path_alloc();
4672        if (!p)
4673                return -ENOMEM;
4674
4675verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
4676
4677        num_read = fill_read_buf(sctx, offset, len);
4678        if (num_read <= 0) {
4679                if (num_read < 0)
4680                        ret = num_read;
4681                goto out;
4682        }
4683
4684        ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4685        if (ret < 0)
4686                goto out;
4687
4688        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4689        if (ret < 0)
4690                goto out;
4691
4692        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4693        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4694        TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4695
4696        ret = send_cmd(sctx);
4697
4698tlv_put_failure:
4699out:
4700        fs_path_free(p);
4701        if (ret < 0)
4702                return ret;
4703        return num_read;
4704}
4705
4706/*
4707 * Send a clone command to user space.
4708 */
4709static int send_clone(struct send_ctx *sctx,
4710                      u64 offset, u32 len,
4711                      struct clone_root *clone_root)
4712{
4713        int ret = 0;
4714        struct fs_path *p;
4715        u64 gen;
4716
4717verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
4718               "clone_inode=%llu, clone_offset=%llu\n", offset, len,
4719                clone_root->root->objectid, clone_root->ino,
4720                clone_root->offset);
4721
4722        p = fs_path_alloc();
4723        if (!p)
4724                return -ENOMEM;
4725
4726        ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4727        if (ret < 0)
4728                goto out;
4729
4730        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4731        if (ret < 0)
4732                goto out;
4733
4734        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4735        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4736        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4737
4738        if (clone_root->root == sctx->send_root) {
4739                ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4740                                &gen, NULL, NULL, NULL, NULL);
4741                if (ret < 0)
4742                        goto out;
4743                ret = get_cur_path(sctx, clone_root->ino, gen, p);
4744        } else {
4745                ret = get_inode_path(clone_root->root, clone_root->ino, p);
4746        }
4747        if (ret < 0)
4748                goto out;
4749
4750        /*
4751         * If the parent we're using has a received_uuid set then use that as
4752         * our clone source as that is what we will look for when doing a
4753         * receive.
4754         *
4755         * This covers the case that we create a snapshot off of a received
4756         * subvolume and then use that as the parent and try to receive on a
4757         * different host.
4758         */
4759        if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
4760                TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4761                             clone_root->root->root_item.received_uuid);
4762        else
4763                TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4764                             clone_root->root->root_item.uuid);
4765        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4766                    le64_to_cpu(clone_root->root->root_item.ctransid));
4767        TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4768        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4769                        clone_root->offset);
4770
4771        ret = send_cmd(sctx);
4772
4773tlv_put_failure:
4774out:
4775        fs_path_free(p);
4776        return ret;
4777}
4778
4779/*
4780 * Send an update extent command to user space.
4781 */
4782static int send_update_extent(struct send_ctx *sctx,
4783                              u64 offset, u32 len)
4784{
4785        int ret = 0;
4786        struct fs_path *p;
4787
4788        p = fs_path_alloc();
4789        if (!p)
4790                return -ENOMEM;
4791
4792        ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
4793        if (ret < 0)
4794                goto out;
4795
4796        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4797        if (ret < 0)
4798                goto out;
4799
4800        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4801        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4802        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
4803
4804        ret = send_cmd(sctx);
4805
4806tlv_put_failure:
4807out:
4808        fs_path_free(p);
4809        return ret;
4810}
4811
4812static int send_hole(struct send_ctx *sctx, u64 end)
4813{
4814        struct fs_path *p = NULL;
4815        u64 offset = sctx->cur_inode_last_extent;
4816        u64 len;
4817        int ret = 0;
4818
4819        p = fs_path_alloc();
4820        if (!p)
4821                return -ENOMEM;
4822        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4823        if (ret < 0)
4824                goto tlv_put_failure;
4825        memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4826        while (offset < end) {
4827                len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4828
4829                ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4830                if (ret < 0)
4831                        break;
4832                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4833                TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4834                TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4835                ret = send_cmd(sctx);
4836                if (ret < 0)
4837                        break;
4838                offset += len;
4839        }
4840tlv_put_failure:
4841        fs_path_free(p);
4842        return ret;
4843}
4844
4845static int send_extent_data(struct send_ctx *sctx,
4846                            const u64 offset,
4847                            const u64 len)
4848{
4849        u64 sent = 0;
4850
4851        if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
4852                return send_update_extent(sctx, offset, len);
4853
4854        while (sent < len) {
4855                u64 size = len - sent;
4856                int ret;
4857
4858                if (size > BTRFS_SEND_READ_SIZE)
4859                        size = BTRFS_SEND_READ_SIZE;
4860                ret = send_write(sctx, offset + sent, size);
4861                if (ret < 0)
4862                        return ret;
4863                if (!ret)
4864                        break;
4865                sent += ret;
4866        }
4867        return 0;
4868}
4869
4870static int clone_range(struct send_ctx *sctx,
4871                       struct clone_root *clone_root,
4872                       const u64 disk_byte,
4873                       u64 data_offset,
4874                       u64 offset,
4875                       u64 len)
4876{
4877        struct btrfs_path *path;
4878        struct btrfs_key key;
4879        int ret;
4880
4881        path = alloc_path_for_send();
4882        if (!path)
4883                return -ENOMEM;
4884
4885        /*
4886         * We can't send a clone operation for the entire range if we find
4887         * extent items in the respective range in the source file that
4888         * refer to different extents or if we find holes.
4889         * So check for that and do a mix of clone and regular write/copy
4890         * operations if needed.
4891         *
4892         * Example:
4893         *
4894         * mkfs.btrfs -f /dev/sda
4895         * mount /dev/sda /mnt
4896         * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
4897         * cp --reflink=always /mnt/foo /mnt/bar
4898         * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
4899         * btrfs subvolume snapshot -r /mnt /mnt/snap
4900         *
4901         * If when we send the snapshot and we are processing file bar (which
4902         * has a higher inode number than foo) we blindly send a clone operation
4903         * for the [0, 100K[ range from foo to bar, the receiver ends up getting
4904         * a file bar that matches the content of file foo - iow, doesn't match
4905         * the content from bar in the original filesystem.
4906         */
4907        key.objectid = clone_root->ino;
4908        key.type = BTRFS_EXTENT_DATA_KEY;
4909        key.offset = clone_root->offset;
4910        ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
4911        if (ret < 0)
4912                goto out;
4913        if (ret > 0 && path->slots[0] > 0) {
4914                btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
4915                if (key.objectid == clone_root->ino &&
4916                    key.type == BTRFS_EXTENT_DATA_KEY)
4917                        path->slots[0]--;
4918        }
4919
4920        while (true) {
4921                struct extent_buffer *leaf = path->nodes[0];
4922                int slot = path->slots[0];
4923                struct btrfs_file_extent_item *ei;
4924                u8 type;
4925                u64 ext_len;
4926                u64 clone_len;
4927
4928                if (slot >= btrfs_header_nritems(leaf)) {
4929                        ret = btrfs_next_leaf(clone_root->root, path);
4930                        if (ret < 0)
4931                                goto out;
4932                        else if (ret > 0)
4933                                break;
4934                        continue;
4935                }
4936
4937                btrfs_item_key_to_cpu(leaf, &key, slot);
4938
4939                /*
4940                 * We might have an implicit trailing hole (NO_HOLES feature
4941                 * enabled). We deal with it after leaving this loop.
4942                 */
4943                if (key.objectid != clone_root->ino ||
4944                    key.type != BTRFS_EXTENT_DATA_KEY)
4945                        break;
4946
4947                ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
4948                type = btrfs_file_extent_type(leaf, ei);
4949                if (type == BTRFS_FILE_EXTENT_INLINE) {
4950                        ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
4951                        ext_len = PAGE_ALIGN(ext_len);
4952                } else {
4953                        ext_len = btrfs_file_extent_num_bytes(leaf, ei);
4954                }
4955
4956                if (key.offset + ext_len <= clone_root->offset)
4957                        goto next;
4958
4959                if (key.offset > clone_root->offset) {
4960                        /* Implicit hole, NO_HOLES feature enabled. */
4961                        u64 hole_len = key.offset - clone_root->offset;
4962
4963                        if (hole_len > len)
4964                                hole_len = len;
4965                        ret = send_extent_data(sctx, offset, hole_len);
4966                        if (ret < 0)
4967                                goto out;
4968
4969                        len -= hole_len;
4970                        if (len == 0)
4971                                break;
4972                        offset += hole_len;
4973                        clone_root->offset += hole_len;
4974                        data_offset += hole_len;
4975                }
4976
4977                if (key.offset >= clone_root->offset + len)
4978                        break;
4979
4980                clone_len = min_t(u64, ext_len, len);
4981
4982                if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
4983                    btrfs_file_extent_offset(leaf, ei) == data_offset)
4984                        ret = send_clone(sctx, offset, clone_len, clone_root);
4985                else
4986                        ret = send_extent_data(sctx, offset, clone_len);
4987
4988                if (ret < 0)
4989                        goto out;
4990
4991                len -= clone_len;
4992                if (len == 0)
4993                        break;
4994                offset += clone_len;
4995                clone_root->offset += clone_len;
4996                data_offset += clone_len;
4997next:
4998                path->slots[0]++;
4999        }
5000
5001        if (len > 0)
5002                ret = send_extent_data(sctx, offset, len);
5003        else
5004                ret = 0;
5005out:
5006        btrfs_free_path(path);
5007        return ret;
5008}
5009
5010static int send_write_or_clone(struct send_ctx *sctx,
5011                               struct btrfs_path *path,
5012                               struct btrfs_key *key,
5013                               struct clone_root *clone_root)
5014{
5015        int ret = 0;
5016        struct btrfs_file_extent_item *ei;
5017        u64 offset = key->offset;
5018        u64 len;
5019        u8 type;
5020        u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
5021
5022        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5023                        struct btrfs_file_extent_item);
5024        type = btrfs_file_extent_type(path->nodes[0], ei);
5025        if (type == BTRFS_FILE_EXTENT_INLINE) {
5026                len = btrfs_file_extent_inline_len(path->nodes[0],
5027                                                   path->slots[0], ei);
5028                /*
5029                 * it is possible the inline item won't cover the whole page,
5030                 * but there may be items after this page.  Make
5031                 * sure to send the whole thing
5032                 */
5033                len = PAGE_ALIGN(len);
5034        } else {
5035                len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
5036        }
5037
5038        if (offset + len > sctx->cur_inode_size)
5039                len = sctx->cur_inode_size - offset;
5040        if (len == 0) {
5041                ret = 0;
5042                goto out;
5043        }
5044
5045        if (clone_root && IS_ALIGNED(offset + len, bs)) {
5046                u64 disk_byte;
5047                u64 data_offset;
5048
5049                disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
5050                data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
5051                ret = clone_range(sctx, clone_root, disk_byte, data_offset,
5052                                  offset, len);
5053        } else {
5054                ret = send_extent_data(sctx, offset, len);
5055        }
5056out:
5057        return ret;
5058}
5059
5060static int is_extent_unchanged(struct send_ctx *sctx,
5061                               struct btrfs_path *left_path,
5062                               struct btrfs_key *ekey)
5063{
5064        int ret = 0;
5065        struct btrfs_key key;
5066        struct btrfs_path *path = NULL;
5067        struct extent_buffer *eb;
5068        int slot;
5069        struct btrfs_key found_key;
5070        struct btrfs_file_extent_item *ei;
5071        u64 left_disknr;
5072        u64 right_disknr;
5073        u64 left_offset;
5074        u64 right_offset;
5075        u64 left_offset_fixed;
5076        u64 left_len;
5077        u64 right_len;
5078        u64 left_gen;
5079        u64 right_gen;
5080        u8 left_type;
5081        u8 right_type;
5082
5083        path = alloc_path_for_send();
5084        if (!path)
5085                return -ENOMEM;
5086
5087        eb = left_path->nodes[0];
5088        slot = left_path->slots[0];
5089        ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5090        left_type = btrfs_file_extent_type(eb, ei);
5091
5092        if (left_type != BTRFS_FILE_EXTENT_REG) {
5093                ret = 0;
5094                goto out;
5095        }
5096        left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5097        left_len = btrfs_file_extent_num_bytes(eb, ei);
5098        left_offset = btrfs_file_extent_offset(eb, ei);
5099        left_gen = btrfs_file_extent_generation(eb, ei);
5100
5101        /*
5102         * Following comments will refer to these graphics. L is the left
5103         * extents which we are checking at the moment. 1-8 are the right
5104         * extents that we iterate.
5105         *
5106         *       |-----L-----|
5107         * |-1-|-2a-|-3-|-4-|-5-|-6-|
5108         *
5109         *       |-----L-----|
5110         * |--1--|-2b-|...(same as above)
5111         *
5112         * Alternative situation. Happens on files where extents got split.
5113         *       |-----L-----|
5114         * |-----------7-----------|-6-|
5115         *
5116         * Alternative situation. Happens on files which got larger.
5117         *       |-----L-----|
5118         * |-8-|
5119         * Nothing follows after 8.
5120         */
5121
5122        key.objectid = ekey->objectid;
5123        key.type = BTRFS_EXTENT_DATA_KEY;
5124        key.offset = ekey->offset;
5125        ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
5126        if (ret < 0)
5127                goto out;
5128        if (ret) {
5129                ret = 0;
5130                goto out;
5131        }
5132
5133        /*
5134         * Handle special case where the right side has no extents at all.
5135         */
5136        eb = path->nodes[0];
5137        slot = path->slots[0];
5138        btrfs_item_key_to_cpu(eb, &found_key, slot);
5139        if (found_key.objectid != key.objectid ||
5140            found_key.type != key.type) {
5141                /* If we're a hole then just pretend nothing changed */
5142                ret = (left_disknr) ? 0 : 1;
5143                goto out;
5144        }
5145
5146        /*
5147         * We're now on 2a, 2b or 7.
5148         */
5149        key = found_key;
5150        while (key.offset < ekey->offset + left_len) {
5151                ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5152                right_type = btrfs_file_extent_type(eb, ei);
5153                if (right_type != BTRFS_FILE_EXTENT_REG) {
5154                        ret = 0;
5155                        goto out;
5156                }
5157
5158                right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5159                right_len = btrfs_file_extent_num_bytes(eb, ei);
5160                right_offset = btrfs_file_extent_offset(eb, ei);
5161                right_gen = btrfs_file_extent_generation(eb, ei);
5162
5163                /*
5164                 * Are we at extent 8? If yes, we know the extent is changed.
5165                 * This may only happen on the first iteration.
5166                 */
5167                if (found_key.offset + right_len <= ekey->offset) {
5168                        /* If we're a hole just pretend nothing changed */
5169                        ret = (left_disknr) ? 0 : 1;
5170                        goto out;
5171                }
5172
5173                left_offset_fixed = left_offset;
5174                if (key.offset < ekey->offset) {
5175                        /* Fix the right offset for 2a and 7. */
5176                        right_offset += ekey->offset - key.offset;
5177                } else {
5178                        /* Fix the left offset for all behind 2a and 2b */
5179                        left_offset_fixed += key.offset - ekey->offset;
5180                }
5181
5182                /*
5183                 * Check if we have the same extent.
5184                 */
5185                if (left_disknr != right_disknr ||
5186                    left_offset_fixed != right_offset ||
5187                    left_gen != right_gen) {
5188                        ret = 0;
5189                        goto out;
5190                }
5191
5192                /*
5193                 * Go to the next extent.
5194                 */
5195                ret = btrfs_next_item(sctx->parent_root, path);
5196                if (ret < 0)
5197                        goto out;
5198                if (!ret) {
5199                        eb = path->nodes[0];
5200                        slot = path->slots[0];
5201                        btrfs_item_key_to_cpu(eb, &found_key, slot);
5202                }
5203                if (ret || found_key.objectid != key.objectid ||
5204                    found_key.type != key.type) {
5205                        key.offset += right_len;
5206                        break;
5207                }
5208                if (found_key.offset != key.offset + right_len) {
5209                        ret = 0;
5210                        goto out;
5211                }
5212                key = found_key;
5213        }
5214
5215        /*
5216         * We're now behind the left extent (treat as unchanged) or at the end
5217         * of the right side (treat as changed).
5218         */
5219        if (key.offset >= ekey->offset + left_len)
5220                ret = 1;
5221        else
5222                ret = 0;
5223
5224
5225out:
5226        btrfs_free_path(path);
5227        return ret;
5228}
5229
5230static int get_last_extent(struct send_ctx *sctx, u64 offset)
5231{
5232        struct btrfs_path *path;
5233        struct btrfs_root *root = sctx->send_root;
5234        struct btrfs_file_extent_item *fi;
5235        struct btrfs_key key;
5236        u64 extent_end;
5237        u8 type;
5238        int ret;
5239
5240        path = alloc_path_for_send();
5241        if (!path)
5242                return -ENOMEM;
5243
5244        sctx->cur_inode_last_extent = 0;
5245
5246        key.objectid = sctx->cur_ino;
5247        key.type = BTRFS_EXTENT_DATA_KEY;
5248        key.offset = offset;
5249        ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
5250        if (ret < 0)
5251                goto out;
5252        ret = 0;
5253        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5254        if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
5255                goto out;
5256
5257        fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5258                            struct btrfs_file_extent_item);
5259        type = btrfs_file_extent_type(path->nodes[0], fi);
5260        if (type == BTRFS_FILE_EXTENT_INLINE) {
5261                u64 size = btrfs_file_extent_inline_len(path->nodes[0],
5262                                                        path->slots[0], fi);
5263                extent_end = ALIGN(key.offset + size,
5264                                   sctx->send_root->sectorsize);
5265        } else {
5266                extent_end = key.offset +
5267                        btrfs_file_extent_num_bytes(path->nodes[0], fi);
5268        }
5269        sctx->cur_inode_last_extent = extent_end;
5270out:
5271        btrfs_free_path(path);
5272        return ret;
5273}
5274
5275static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
5276                           struct btrfs_key *key)
5277{
5278        struct btrfs_file_extent_item *fi;
5279        u64 extent_end;
5280        u8 type;
5281        int ret = 0;
5282
5283        if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
5284                return 0;
5285
5286        if (sctx->cur_inode_last_extent == (u64)-1) {
5287                ret = get_last_extent(sctx, key->offset - 1);
5288                if (ret)
5289                        return ret;
5290        }
5291
5292        fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5293                            struct btrfs_file_extent_item);
5294        type = btrfs_file_extent_type(path->nodes[0], fi);
5295        if (type == BTRFS_FILE_EXTENT_INLINE) {
5296                u64 size = btrfs_file_extent_inline_len(path->nodes[0],
5297                                                        path->slots[0], fi);
5298                extent_end = ALIGN(key->offset + size,
5299                                   sctx->send_root->sectorsize);
5300        } else {
5301                extent_end = key->offset +
5302                        btrfs_file_extent_num_bytes(path->nodes[0], fi);
5303        }
5304
5305        if (path->slots[0] == 0 &&
5306            sctx->cur_inode_last_extent < key->offset) {
5307                /*
5308                 * We might have skipped entire leafs that contained only
5309                 * file extent items for our current inode. These leafs have
5310                 * a generation number smaller (older) than the one in the
5311                 * current leaf and the leaf our last extent came from, and
5312                 * are located between these 2 leafs.
5313                 */
5314                ret = get_last_extent(sctx, key->offset - 1);
5315                if (ret)
5316                        return ret;
5317        }
5318
5319        if (sctx->cur_inode_last_extent < key->offset)
5320                ret = send_hole(sctx, key->offset);
5321        sctx->cur_inode_last_extent = extent_end;
5322        return ret;
5323}
5324
5325static int process_extent(struct send_ctx *sctx,
5326                          struct btrfs_path *path,
5327                          struct btrfs_key *key)
5328{
5329        struct clone_root *found_clone = NULL;
5330        int ret = 0;
5331
5332        if (S_ISLNK(sctx->cur_inode_mode))
5333                return 0;
5334
5335        if (sctx->parent_root && !sctx->cur_inode_new) {
5336                ret = is_extent_unchanged(sctx, path, key);
5337                if (ret < 0)
5338                        goto out;
5339                if (ret) {
5340                        ret = 0;
5341                        goto out_hole;
5342                }
5343        } else {
5344                struct btrfs_file_extent_item *ei;
5345                u8 type;
5346
5347                ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5348                                    struct btrfs_file_extent_item);
5349                type = btrfs_file_extent_type(path->nodes[0], ei);
5350                if (type == BTRFS_FILE_EXTENT_PREALLOC ||
5351                    type == BTRFS_FILE_EXTENT_REG) {
5352                        /*
5353                         * The send spec does not have a prealloc command yet,
5354                         * so just leave a hole for prealloc'ed extents until
5355                         * we have enough commands queued up to justify rev'ing
5356                         * the send spec.
5357                         */
5358                        if (type == BTRFS_FILE_EXTENT_PREALLOC) {
5359                                ret = 0;
5360                                goto out;
5361                        }
5362
5363                        /* Have a hole, just skip it. */
5364                        if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
5365                                ret = 0;
5366                                goto out;
5367                        }
5368                }
5369        }
5370
5371        ret = find_extent_clone(sctx, path, key->objectid, key->offset,
5372                        sctx->cur_inode_size, &found_clone);
5373        if (ret != -ENOENT && ret < 0)
5374                goto out;
5375
5376        ret = send_write_or_clone(sctx, path, key, found_clone);
5377        if (ret)
5378                goto out;
5379out_hole:
5380        ret = maybe_send_hole(sctx, path, key);
5381out:
5382        return ret;
5383}
5384
5385static int process_all_extents(struct send_ctx *sctx)
5386{
5387        int ret;
5388        struct btrfs_root *root;
5389        struct btrfs_path *path;
5390        struct btrfs_key key;
5391        struct btrfs_key found_key;
5392        struct extent_buffer *eb;
5393        int slot;
5394
5395        root = sctx->send_root;
5396        path = alloc_path_for_send();
5397        if (!path)
5398                return -ENOMEM;
5399
5400        key.objectid = sctx->cmp_key->objectid;
5401        key.type = BTRFS_EXTENT_DATA_KEY;
5402        key.offset = 0;
5403        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5404        if (ret < 0)
5405                goto out;
5406
5407        while (1) {
5408                eb = path->nodes[0];
5409                slot = path->slots[0];
5410
5411                if (slot >= btrfs_header_nritems(eb)) {
5412                        ret = btrfs_next_leaf(root, path);
5413                        if (ret < 0) {
5414                                goto out;
5415                        } else if (ret > 0) {
5416                                ret = 0;
5417                                break;
5418                        }
5419                        continue;
5420                }
5421
5422                btrfs_item_key_to_cpu(eb, &found_key, slot);
5423
5424                if (found_key.objectid != key.objectid ||
5425                    found_key.type != key.type) {
5426                        ret = 0;
5427                        goto out;
5428                }
5429
5430                ret = process_extent(sctx, path, &found_key);
5431                if (ret < 0)
5432                        goto out;
5433
5434                path->slots[0]++;
5435        }
5436
5437out:
5438        btrfs_free_path(path);
5439        return ret;
5440}
5441
5442static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
5443                                           int *pending_move,
5444                                           int *refs_processed)
5445{
5446        int ret = 0;
5447
5448        if (sctx->cur_ino == 0)
5449                goto out;
5450        if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
5451            sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
5452                goto out;
5453        if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
5454                goto out;
5455
5456        ret = process_recorded_refs(sctx, pending_move);
5457        if (ret < 0)
5458                goto out;
5459
5460        *refs_processed = 1;
5461out:
5462        return ret;
5463}
5464
5465static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5466{
5467        int ret = 0;
5468        u64 left_mode;
5469        u64 left_uid;
5470        u64 left_gid;
5471        u64 right_mode;
5472        u64 right_uid;
5473        u64 right_gid;
5474        int need_chmod = 0;
5475        int need_chown = 0;
5476        int pending_move = 0;
5477        int refs_processed = 0;
5478
5479        ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5480                                              &refs_processed);
5481        if (ret < 0)
5482                goto out;
5483
5484        /*
5485         * We have processed the refs and thus need to advance send_progress.
5486         * Now, calls to get_cur_xxx will take the updated refs of the current
5487         * inode into account.
5488         *
5489         * On the other hand, if our current inode is a directory and couldn't
5490         * be moved/renamed because its parent was renamed/moved too and it has
5491         * a higher inode number, we can only move/rename our current inode
5492         * after we moved/renamed its parent. Therefore in this case operate on
5493         * the old path (pre move/rename) of our current inode, and the
5494         * move/rename will be performed later.
5495         */
5496        if (refs_processed && !pending_move)
5497                sctx->send_progress = sctx->cur_ino + 1;
5498
5499        if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
5500                goto out;
5501        if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
5502                goto out;
5503
5504        ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
5505                        &left_mode, &left_uid, &left_gid, NULL);
5506        if (ret < 0)
5507                goto out;
5508
5509        if (!sctx->parent_root || sctx->cur_inode_new) {
5510                need_chown = 1;
5511                if (!S_ISLNK(sctx->cur_inode_mode))
5512                        need_chmod = 1;
5513        } else {
5514                ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
5515                                NULL, NULL, &right_mode, &right_uid,
5516                                &right_gid, NULL);
5517                if (ret < 0)
5518                        goto out;
5519
5520                if (left_uid != right_uid || left_gid != right_gid)
5521                        need_chown = 1;
5522                if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
5523                        need_chmod = 1;
5524        }
5525
5526        if (S_ISREG(sctx->cur_inode_mode)) {
5527                if (need_send_hole(sctx)) {
5528                        if (sctx->cur_inode_last_extent == (u64)-1 ||
5529                            sctx->cur_inode_last_extent <
5530                            sctx->cur_inode_size) {
5531                                ret = get_last_extent(sctx, (u64)-1);
5532                                if (ret)
5533                                        goto out;
5534                        }
5535                        if (sctx->cur_inode_last_extent <
5536                            sctx->cur_inode_size) {
5537                                ret = send_hole(sctx, sctx->cur_inode_size);
5538                                if (ret)
5539                                        goto out;
5540                        }
5541                }
5542                ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5543                                sctx->cur_inode_size);
5544                if (ret < 0)
5545                        goto out;
5546        }
5547
5548        if (need_chown) {
5549                ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5550                                left_uid, left_gid);
5551                if (ret < 0)
5552                        goto out;
5553        }
5554        if (need_chmod) {
5555                ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5556                                left_mode);
5557                if (ret < 0)
5558                        goto out;
5559        }
5560
5561        /*
5562         * If other directory inodes depended on our current directory
5563         * inode's move/rename, now do their move/rename operations.
5564         */
5565        if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
5566                ret = apply_children_dir_moves(sctx);
5567                if (ret)
5568                        goto out;
5569                /*
5570                 * Need to send that every time, no matter if it actually
5571                 * changed between the two trees as we have done changes to
5572                 * the inode before. If our inode is a directory and it's
5573                 * waiting to be moved/renamed, we will send its utimes when
5574                 * it's moved/renamed, therefore we don't need to do it here.
5575                 */
5576                sctx->send_progress = sctx->cur_ino + 1;
5577                ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
5578                if (ret < 0)
5579                        goto out;
5580        }
5581
5582out:
5583        return ret;
5584}
5585
5586static int changed_inode(struct send_ctx *sctx,
5587                         enum btrfs_compare_tree_result result)
5588{
5589        int ret = 0;
5590        struct btrfs_key *key = sctx->cmp_key;
5591        struct btrfs_inode_item *left_ii = NULL;
5592        struct btrfs_inode_item *right_ii = NULL;
5593        u64 left_gen = 0;
5594        u64 right_gen = 0;
5595
5596        sctx->cur_ino = key->objectid;
5597        sctx->cur_inode_new_gen = 0;
5598        sctx->cur_inode_last_extent = (u64)-1;
5599
5600        /*
5601         * Set send_progress to current inode. This will tell all get_cur_xxx
5602         * functions that the current inode's refs are not updated yet. Later,
5603         * when process_recorded_refs is finished, it is set to cur_ino + 1.
5604         */
5605        sctx->send_progress = sctx->cur_ino;
5606
5607        if (result == BTRFS_COMPARE_TREE_NEW ||
5608            result == BTRFS_COMPARE_TREE_CHANGED) {
5609                left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
5610                                sctx->left_path->slots[0],
5611                                struct btrfs_inode_item);
5612                left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
5613                                left_ii);
5614        } else {
5615                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5616                                sctx->right_path->slots[0],
5617                                struct btrfs_inode_item);
5618                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5619                                right_ii);
5620        }
5621        if (result == BTRFS_COMPARE_TREE_CHANGED) {
5622                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5623                                sctx->right_path->slots[0],
5624                                struct btrfs_inode_item);
5625
5626                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5627                                right_ii);
5628
5629                /*
5630                 * The cur_ino = root dir case is special here. We can't treat
5631                 * the inode as deleted+reused because it would generate a
5632                 * stream that tries to delete/mkdir the root dir.
5633                 */
5634                if (left_gen != right_gen &&
5635                    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5636                        sctx->cur_inode_new_gen = 1;
5637        }
5638
5639        if (result == BTRFS_COMPARE_TREE_NEW) {
5640                sctx->cur_inode_gen = left_gen;
5641                sctx->cur_inode_new = 1;
5642                sctx->cur_inode_deleted = 0;
5643                sctx->cur_inode_size = btrfs_inode_size(
5644                                sctx->left_path->nodes[0], left_ii);
5645                sctx->cur_inode_mode = btrfs_inode_mode(
5646                                sctx->left_path->nodes[0], left_ii);
5647                sctx->cur_inode_rdev = btrfs_inode_rdev(
5648                                sctx->left_path->nodes[0], left_ii);
5649                if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5650                        ret = send_create_inode_if_needed(sctx);
5651        } else if (result == BTRFS_COMPARE_TREE_DELETED) {
5652                sctx->cur_inode_gen = right_gen;
5653                sctx->cur_inode_new = 0;
5654                sctx->cur_inode_deleted = 1;
5655                sctx->cur_inode_size = btrfs_inode_size(
5656                                sctx->right_path->nodes[0], right_ii);
5657                sctx->cur_inode_mode = btrfs_inode_mode(
5658                                sctx->right_path->nodes[0], right_ii);
5659        } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
5660                /*
5661                 * We need to do some special handling in case the inode was
5662                 * reported as changed with a changed generation number. This
5663                 * means that the original inode was deleted and new inode
5664                 * reused the same inum. So we have to treat the old inode as
5665                 * deleted and the new one as new.
5666                 */
5667                if (sctx->cur_inode_new_gen) {
5668                        /*
5669                         * First, process the inode as if it was deleted.
5670                         */
5671                        sctx->cur_inode_gen = right_gen;
5672                        sctx->cur_inode_new = 0;
5673                        sctx->cur_inode_deleted = 1;
5674                        sctx->cur_inode_size = btrfs_inode_size(
5675                                        sctx->right_path->nodes[0], right_ii);
5676                        sctx->cur_inode_mode = btrfs_inode_mode(
5677                                        sctx->right_path->nodes[0], right_ii);
5678                        ret = process_all_refs(sctx,
5679                                        BTRFS_COMPARE_TREE_DELETED);
5680                        if (ret < 0)
5681                                goto out;
5682
5683                        /*
5684                         * Now process the inode as if it was new.
5685                         */
5686                        sctx->cur_inode_gen = left_gen;
5687                        sctx->cur_inode_new = 1;
5688                        sctx->cur_inode_deleted = 0;
5689                        sctx->cur_inode_size = btrfs_inode_size(
5690                                        sctx->left_path->nodes[0], left_ii);
5691                        sctx->cur_inode_mode = btrfs_inode_mode(
5692                                        sctx->left_path->nodes[0], left_ii);
5693                        sctx->cur_inode_rdev = btrfs_inode_rdev(
5694                                        sctx->left_path->nodes[0], left_ii);
5695                        ret = send_create_inode_if_needed(sctx);
5696                        if (ret < 0)
5697                                goto out;
5698
5699                        ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
5700                        if (ret < 0)
5701                                goto out;
5702                        /*
5703                         * Advance send_progress now as we did not get into
5704                         * process_recorded_refs_if_needed in the new_gen case.
5705                         */
5706                        sctx->send_progress = sctx->cur_ino + 1;
5707
5708                        /*
5709                         * Now process all extents and xattrs of the inode as if
5710                         * they were all new.
5711                         */
5712                        ret = process_all_extents(sctx);
5713                        if (ret < 0)
5714                                goto out;
5715                        ret = process_all_new_xattrs(sctx);
5716                        if (ret < 0)
5717                                goto out;
5718                } else {
5719                        sctx->cur_inode_gen = left_gen;
5720                        sctx->cur_inode_new = 0;
5721                        sctx->cur_inode_new_gen = 0;
5722                        sctx->cur_inode_deleted = 0;
5723                        sctx->cur_inode_size = btrfs_inode_size(
5724                                        sctx->left_path->nodes[0], left_ii);
5725                        sctx->cur_inode_mode = btrfs_inode_mode(
5726                                        sctx->left_path->nodes[0], left_ii);
5727                }
5728        }
5729
5730out:
5731        return ret;
5732}
5733
5734/*
5735 * We have to process new refs before deleted refs, but compare_trees gives us
5736 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
5737 * first and later process them in process_recorded_refs.
5738 * For the cur_inode_new_gen case, we skip recording completely because
5739 * changed_inode did already initiate processing of refs. The reason for this is
5740 * that in this case, compare_tree actually compares the refs of 2 different
5741 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
5742 * refs of the right tree as deleted and all refs of the left tree as new.
5743 */
5744static int changed_ref(struct send_ctx *sctx,
5745                       enum btrfs_compare_tree_result result)
5746{
5747        int ret = 0;
5748
5749        if (sctx->cur_ino != sctx->cmp_key->objectid) {
5750                inconsistent_snapshot_error(sctx, result, "reference");
5751                return -EIO;
5752        }
5753
5754        if (!sctx->cur_inode_new_gen &&
5755            sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
5756                if (result == BTRFS_COMPARE_TREE_NEW)
5757                        ret = record_new_ref(sctx);
5758                else if (result == BTRFS_COMPARE_TREE_DELETED)
5759                        ret = record_deleted_ref(sctx);
5760                else if (result == BTRFS_COMPARE_TREE_CHANGED)
5761                        ret = record_changed_ref(sctx);
5762        }
5763
5764        return ret;
5765}
5766
5767/*
5768 * Process new/deleted/changed xattrs. We skip processing in the
5769 * cur_inode_new_gen case because changed_inode did already initiate processing
5770 * of xattrs. The reason is the same as in changed_ref
5771 */
5772static int changed_xattr(struct send_ctx *sctx,
5773                         enum btrfs_compare_tree_result result)
5774{
5775        int ret = 0;
5776
5777        if (sctx->cur_ino != sctx->cmp_key->objectid) {
5778                inconsistent_snapshot_error(sctx, result, "xattr");
5779                return -EIO;
5780        }
5781
5782        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5783                if (result == BTRFS_COMPARE_TREE_NEW)
5784                        ret = process_new_xattr(sctx);
5785                else if (result == BTRFS_COMPARE_TREE_DELETED)
5786                        ret = process_deleted_xattr(sctx);
5787                else if (result == BTRFS_COMPARE_TREE_CHANGED)
5788                        ret = process_changed_xattr(sctx);
5789        }
5790
5791        return ret;
5792}
5793
5794/*
5795 * Process new/deleted/changed extents. We skip processing in the
5796 * cur_inode_new_gen case because changed_inode did already initiate processing
5797 * of extents. The reason is the same as in changed_ref
5798 */
5799static int changed_extent(struct send_ctx *sctx,
5800                          enum btrfs_compare_tree_result result)
5801{
5802        int ret = 0;
5803
5804        if (sctx->cur_ino != sctx->cmp_key->objectid) {
5805                inconsistent_snapshot_error(sctx, result, "extent");
5806                return -EIO;
5807        }
5808
5809        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5810                if (result != BTRFS_COMPARE_TREE_DELETED)
5811                        ret = process_extent(sctx, sctx->left_path,
5812                                        sctx->cmp_key);
5813        }
5814
5815        return ret;
5816}
5817
5818static int dir_changed(struct send_ctx *sctx, u64 dir)
5819{
5820        u64 orig_gen, new_gen;
5821        int ret;
5822
5823        ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
5824                             NULL, NULL);
5825        if (ret)
5826                return ret;
5827
5828        ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
5829                             NULL, NULL, NULL);
5830        if (ret)
5831                return ret;
5832
5833        return (orig_gen != new_gen) ? 1 : 0;
5834}
5835
5836static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
5837                        struct btrfs_key *key)
5838{
5839        struct btrfs_inode_extref *extref;
5840        struct extent_buffer *leaf;
5841        u64 dirid = 0, last_dirid = 0;
5842        unsigned long ptr;
5843        u32 item_size;
5844        u32 cur_offset = 0;
5845        int ref_name_len;
5846        int ret = 0;
5847
5848        /* Easy case, just check this one dirid */
5849        if (key->type == BTRFS_INODE_REF_KEY) {
5850                dirid = key->offset;
5851
5852                ret = dir_changed(sctx, dirid);
5853                goto out;
5854        }
5855
5856        leaf = path->nodes[0];
5857        item_size = btrfs_item_size_nr(leaf, path->slots[0]);
5858        ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
5859        while (cur_offset < item_size) {
5860                extref = (struct btrfs_inode_extref *)(ptr +
5861                                                       cur_offset);
5862                dirid = btrfs_inode_extref_parent(leaf, extref);
5863                ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
5864                cur_offset += ref_name_len + sizeof(*extref);
5865                if (dirid == last_dirid)
5866                        continue;
5867                ret = dir_changed(sctx, dirid);
5868                if (ret)
5869                        break;
5870                last_dirid = dirid;
5871        }
5872out:
5873        return ret;
5874}
5875
5876/*
5877 * Updates compare related fields in sctx and simply forwards to the actual
5878 * changed_xxx functions.
5879 */
5880static int changed_cb(struct btrfs_root *left_root,
5881                      struct btrfs_root *right_root,
5882                      struct btrfs_path *left_path,
5883                      struct btrfs_path *right_path,
5884                      struct btrfs_key *key,
5885                      enum btrfs_compare_tree_result result,
5886                      void *ctx)
5887{
5888        int ret = 0;
5889        struct send_ctx *sctx = ctx;
5890
5891        if (result == BTRFS_COMPARE_TREE_SAME) {
5892                if (key->type == BTRFS_INODE_REF_KEY ||
5893                    key->type == BTRFS_INODE_EXTREF_KEY) {
5894                        ret = compare_refs(sctx, left_path, key);
5895                        if (!ret)
5896                                return 0;
5897                        if (ret < 0)
5898                                return ret;
5899                } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5900                        return maybe_send_hole(sctx, left_path, key);
5901                } else {
5902                        return 0;
5903                }
5904                result = BTRFS_COMPARE_TREE_CHANGED;
5905                ret = 0;
5906        }
5907
5908        sctx->left_path = left_path;
5909        sctx->right_path = right_path;
5910        sctx->cmp_key = key;
5911
5912        ret = finish_inode_if_needed(sctx, 0);
5913        if (ret < 0)
5914                goto out;
5915
5916        /* Ignore non-FS objects */
5917        if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
5918            key->objectid == BTRFS_FREE_SPACE_OBJECTID)
5919                goto out;
5920
5921        if (key->type == BTRFS_INODE_ITEM_KEY)
5922                ret = changed_inode(sctx, result);
5923        else if (key->type == BTRFS_INODE_REF_KEY ||
5924                 key->type == BTRFS_INODE_EXTREF_KEY)
5925                ret = changed_ref(sctx, result);
5926        else if (key->type == BTRFS_XATTR_ITEM_KEY)
5927                ret = changed_xattr(sctx, result);
5928        else if (key->type == BTRFS_EXTENT_DATA_KEY)
5929                ret = changed_extent(sctx, result);
5930
5931out:
5932        return ret;
5933}
5934
5935static int full_send_tree(struct send_ctx *sctx)
5936{
5937        int ret;
5938        struct btrfs_root *send_root = sctx->send_root;
5939        struct btrfs_key key;
5940        struct btrfs_key found_key;
5941        struct btrfs_path *path;
5942        struct extent_buffer *eb;
5943        int slot;
5944
5945        path = alloc_path_for_send();
5946        if (!path)
5947                return -ENOMEM;
5948
5949        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5950        key.type = BTRFS_INODE_ITEM_KEY;
5951        key.offset = 0;
5952
5953        ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
5954        if (ret < 0)
5955                goto out;
5956        if (ret)
5957                goto out_finish;
5958
5959        while (1) {
5960                eb = path->nodes[0];
5961                slot = path->slots[0];
5962                btrfs_item_key_to_cpu(eb, &found_key, slot);
5963
5964                ret = changed_cb(send_root, NULL, path, NULL,
5965                                &found_key, BTRFS_COMPARE_TREE_NEW, sctx);
5966                if (ret < 0)
5967                        goto out;
5968
5969                key.objectid = found_key.objectid;
5970                key.type = found_key.type;
5971                key.offset = found_key.offset + 1;
5972
5973                ret = btrfs_next_item(send_root, path);
5974                if (ret < 0)
5975                        goto out;
5976                if (ret) {
5977                        ret  = 0;
5978                        break;
5979                }
5980        }
5981
5982out_finish:
5983        ret = finish_inode_if_needed(sctx, 1);
5984
5985out:
5986        btrfs_free_path(path);
5987        return ret;
5988}
5989
5990static int send_subvol(struct send_ctx *sctx)
5991{
5992        int ret;
5993
5994        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
5995                ret = send_header(sctx);
5996                if (ret < 0)
5997                        goto out;
5998        }
5999
6000        ret = send_subvol_begin(sctx);
6001        if (ret < 0)
6002                goto out;
6003
6004        if (sctx->parent_root) {
6005                ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
6006                                changed_cb, sctx);
6007                if (ret < 0)
6008                        goto out;
6009                ret = finish_inode_if_needed(sctx, 1);
6010                if (ret < 0)
6011                        goto out;
6012        } else {
6013                ret = full_send_tree(sctx);
6014                if (ret < 0)
6015                        goto out;
6016        }
6017
6018out:
6019        free_recorded_refs(sctx);
6020        return ret;
6021}
6022
6023/*
6024 * If orphan cleanup did remove any orphans from a root, it means the tree
6025 * was modified and therefore the commit root is not the same as the current
6026 * root anymore. This is a problem, because send uses the commit root and
6027 * therefore can see inode items that don't exist in the current root anymore,
6028 * and for example make calls to btrfs_iget, which will do tree lookups based
6029 * on the current root and not on the commit root. Those lookups will fail,
6030 * returning a -ESTALE error, and making send fail with that error. So make
6031 * sure a send does not see any orphans we have just removed, and that it will
6032 * see the same inodes regardless of whether a transaction commit happened
6033 * before it started (meaning that the commit root will be the same as the
6034 * current root) or not.
6035 */
6036static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
6037{
6038        int i;
6039        struct btrfs_trans_handle *trans = NULL;
6040
6041again:
6042        if (sctx->parent_root &&
6043            sctx->parent_root->node != sctx->parent_root->commit_root)
6044                goto commit_trans;
6045
6046        for (i = 0; i < sctx->clone_roots_cnt; i++)
6047                if (sctx->clone_roots[i].root->node !=
6048                    sctx->clone_roots[i].root->commit_root)
6049                        goto commit_trans;
6050
6051        if (trans)
6052                return btrfs_end_transaction(trans, sctx->send_root);
6053
6054        return 0;
6055
6056commit_trans:
6057        /* Use any root, all fs roots will get their commit roots updated. */
6058        if (!trans) {
6059                trans = btrfs_join_transaction(sctx->send_root);
6060                if (IS_ERR(trans))
6061                        return PTR_ERR(trans);
6062                goto again;
6063        }
6064
6065        return btrfs_commit_transaction(trans, sctx->send_root);
6066}
6067
6068static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
6069{
6070        spin_lock(&root->root_item_lock);
6071        root->send_in_progress--;
6072        /*
6073         * Not much left to do, we don't know why it's unbalanced and
6074         * can't blindly reset it to 0.
6075         */
6076        if (root->send_in_progress < 0)
6077                btrfs_err(root->fs_info,
6078                        "send_in_progres unbalanced %d root %llu",
6079                        root->send_in_progress, root->root_key.objectid);
6080        spin_unlock(&root->root_item_lock);
6081}
6082
6083long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
6084{
6085        int ret = 0;
6086        struct btrfs_root *send_root;
6087        struct btrfs_root *clone_root;
6088        struct btrfs_fs_info *fs_info;
6089        struct btrfs_ioctl_send_args *arg = NULL;
6090        struct btrfs_key key;
6091        struct send_ctx *sctx = NULL;
6092        u32 i;
6093        u64 *clone_sources_tmp = NULL;
6094        int clone_sources_to_rollback = 0;
6095        unsigned alloc_size;
6096        int sort_clone_roots = 0;
6097        int index;
6098
6099        if (!capable(CAP_SYS_ADMIN))
6100                return -EPERM;
6101
6102        send_root = BTRFS_I(file_inode(mnt_file))->root;
6103        fs_info = send_root->fs_info;
6104
6105        /*
6106         * The subvolume must remain read-only during send, protect against
6107         * making it RW. This also protects against deletion.
6108         */
6109        spin_lock(&send_root->root_item_lock);
6110        send_root->send_in_progress++;
6111        spin_unlock(&send_root->root_item_lock);
6112
6113        /*
6114         * This is done when we lookup the root, it should already be complete
6115         * by the time we get here.
6116         */
6117        WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
6118
6119        /*
6120         * Userspace tools do the checks and warn the user if it's
6121         * not RO.
6122         */
6123        if (!btrfs_root_readonly(send_root)) {
6124                ret = -EPERM;
6125                goto out;
6126        }
6127
6128        arg = memdup_user(arg_, sizeof(*arg));
6129        if (IS_ERR(arg)) {
6130                ret = PTR_ERR(arg);
6131                arg = NULL;
6132                goto out;
6133        }
6134
6135        if (arg->clone_sources_count >
6136            ULLONG_MAX / sizeof(*arg->clone_sources)) {
6137                ret = -EINVAL;
6138                goto out;
6139        }
6140
6141        if (!access_ok(VERIFY_READ, arg->clone_sources,
6142                        sizeof(*arg->clone_sources) *
6143                        arg->clone_sources_count)) {
6144                ret = -EFAULT;
6145                goto out;
6146        }
6147
6148        if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
6149                ret = -EINVAL;
6150                goto out;
6151        }
6152
6153        sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
6154        if (!sctx) {
6155                ret = -ENOMEM;
6156                goto out;
6157        }
6158
6159        INIT_LIST_HEAD(&sctx->new_refs);
6160        INIT_LIST_HEAD(&sctx->deleted_refs);
6161        INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
6162        INIT_LIST_HEAD(&sctx->name_cache_list);
6163
6164        sctx->flags = arg->flags;
6165
6166        sctx->send_filp = fget(arg->send_fd);
6167        if (!sctx->send_filp) {
6168                ret = -EBADF;
6169                goto out;
6170        }
6171
6172        sctx->send_root = send_root;
6173        /*
6174         * Unlikely but possible, if the subvolume is marked for deletion but
6175         * is slow to remove the directory entry, send can still be started
6176         */
6177        if (btrfs_root_dead(sctx->send_root)) {
6178                ret = -EPERM;
6179                goto out;
6180        }
6181
6182        sctx->clone_roots_cnt = arg->clone_sources_count;
6183
6184        sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
6185        sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN);
6186        if (!sctx->send_buf) {
6187                sctx->send_buf = vmalloc(sctx->send_max_size);
6188                if (!sctx->send_buf) {
6189                        ret = -ENOMEM;
6190                        goto out;
6191                }
6192        }
6193
6194        sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN);
6195        if (!sctx->read_buf) {
6196                sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
6197                if (!sctx->read_buf) {
6198                        ret = -ENOMEM;
6199                        goto out;
6200                }
6201        }
6202
6203        sctx->pending_dir_moves = RB_ROOT;
6204        sctx->waiting_dir_moves = RB_ROOT;
6205        sctx->orphan_dirs = RB_ROOT;
6206
6207        alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
6208
6209        sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
6210        if (!sctx->clone_roots) {
6211                sctx->clone_roots = vzalloc(alloc_size);
6212                if (!sctx->clone_roots) {
6213                        ret = -ENOMEM;
6214                        goto out;
6215                }
6216        }
6217
6218        alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
6219
6220        if (arg->clone_sources_count) {
6221                clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
6222                if (!clone_sources_tmp) {
6223                        clone_sources_tmp = vmalloc(alloc_size);
6224                        if (!clone_sources_tmp) {
6225                                ret = -ENOMEM;
6226                                goto out;
6227                        }
6228                }
6229
6230                ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
6231                                alloc_size);
6232                if (ret) {
6233                        ret = -EFAULT;
6234                        goto out;
6235                }
6236
6237                for (i = 0; i < arg->clone_sources_count; i++) {
6238                        key.objectid = clone_sources_tmp[i];
6239                        key.type = BTRFS_ROOT_ITEM_KEY;
6240                        key.offset = (u64)-1;
6241
6242                        index = srcu_read_lock(&fs_info->subvol_srcu);
6243
6244                        clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
6245                        if (IS_ERR(clone_root)) {
6246                                srcu_read_unlock(&fs_info->subvol_srcu, index);
6247                                ret = PTR_ERR(clone_root);
6248                                goto out;
6249                        }
6250                        spin_lock(&clone_root->root_item_lock);
6251                        if (!btrfs_root_readonly(clone_root) ||
6252                            btrfs_root_dead(clone_root)) {
6253                                spin_unlock(&clone_root->root_item_lock);
6254                                srcu_read_unlock(&fs_info->subvol_srcu, index);
6255                                ret = -EPERM;
6256                                goto out;
6257                        }
6258                        clone_root->send_in_progress++;
6259                        spin_unlock(&clone_root->root_item_lock);
6260                        srcu_read_unlock(&fs_info->subvol_srcu, index);
6261
6262                        sctx->clone_roots[i].root = clone_root;
6263                        clone_sources_to_rollback = i + 1;
6264                }
6265                kvfree(clone_sources_tmp);
6266                clone_sources_tmp = NULL;
6267        }
6268
6269        if (arg->parent_root) {
6270                key.objectid = arg->parent_root;
6271                key.type = BTRFS_ROOT_ITEM_KEY;
6272                key.offset = (u64)-1;
6273
6274                index = srcu_read_lock(&fs_info->subvol_srcu);
6275
6276                sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
6277                if (IS_ERR(sctx->parent_root)) {
6278                        srcu_read_unlock(&fs_info->subvol_srcu, index);
6279                        ret = PTR_ERR(sctx->parent_root);
6280                        goto out;
6281                }
6282
6283                spin_lock(&sctx->parent_root->root_item_lock);
6284                sctx->parent_root->send_in_progress++;
6285                if (!btrfs_root_readonly(sctx->parent_root) ||
6286                                btrfs_root_dead(sctx->parent_root)) {
6287                        spin_unlock(&sctx->parent_root->root_item_lock);
6288                        srcu_read_unlock(&fs_info->subvol_srcu, index);
6289                        ret = -EPERM;
6290                        goto out;
6291                }
6292                spin_unlock(&sctx->parent_root->root_item_lock);
6293
6294                srcu_read_unlock(&fs_info->subvol_srcu, index);
6295        }
6296
6297        /*
6298         * Clones from send_root are allowed, but only if the clone source
6299         * is behind the current send position. This is checked while searching
6300         * for possible clone sources.
6301         */
6302        sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
6303
6304        /* We do a bsearch later */
6305        sort(sctx->clone_roots, sctx->clone_roots_cnt,
6306                        sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
6307                        NULL);
6308        sort_clone_roots = 1;
6309
6310        ret = ensure_commit_roots_uptodate(sctx);
6311        if (ret)
6312                goto out;
6313
6314        current->journal_info = BTRFS_SEND_TRANS_STUB;
6315        ret = send_subvol(sctx);
6316        current->journal_info = NULL;
6317        if (ret < 0)
6318                goto out;
6319
6320        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
6321                ret = begin_cmd(sctx, BTRFS_SEND_C_END);
6322                if (ret < 0)
6323                        goto out;
6324                ret = send_cmd(sctx);
6325                if (ret < 0)
6326                        goto out;
6327        }
6328
6329out:
6330        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
6331        while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
6332                struct rb_node *n;
6333                struct pending_dir_move *pm;
6334
6335                n = rb_first(&sctx->pending_dir_moves);
6336                pm = rb_entry(n, struct pending_dir_move, node);
6337                while (!list_empty(&pm->list)) {
6338                        struct pending_dir_move *pm2;
6339
6340                        pm2 = list_first_entry(&pm->list,
6341                                               struct pending_dir_move, list);
6342                        free_pending_move(sctx, pm2);
6343                }
6344                free_pending_move(sctx, pm);
6345        }
6346
6347        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
6348        while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
6349                struct rb_node *n;
6350                struct waiting_dir_move *dm;
6351
6352                n = rb_first(&sctx->waiting_dir_moves);
6353                dm = rb_entry(n, struct waiting_dir_move, node);
6354                rb_erase(&dm->node, &sctx->waiting_dir_moves);
6355                kfree(dm);
6356        }
6357
6358        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
6359        while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
6360                struct rb_node *n;
6361                struct orphan_dir_info *odi;
6362
6363                n = rb_first(&sctx->orphan_dirs);
6364                odi = rb_entry(n, struct orphan_dir_info, node);
6365                free_orphan_dir_info(sctx, odi);
6366        }
6367
6368        if (sort_clone_roots) {
6369                for (i = 0; i < sctx->clone_roots_cnt; i++)
6370                        btrfs_root_dec_send_in_progress(
6371                                        sctx->clone_roots[i].root);
6372        } else {
6373                for (i = 0; sctx && i < clone_sources_to_rollback; i++)
6374                        btrfs_root_dec_send_in_progress(
6375                                        sctx->clone_roots[i].root);
6376
6377                btrfs_root_dec_send_in_progress(send_root);
6378        }
6379        if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
6380                btrfs_root_dec_send_in_progress(sctx->parent_root);
6381
6382        kfree(arg);
6383        kvfree(clone_sources_tmp);
6384
6385        if (sctx) {
6386                if (sctx->send_filp)
6387                        fput(sctx->send_filp);
6388
6389                kvfree(sctx->clone_roots);
6390                kvfree(sctx->send_buf);
6391                kvfree(sctx->read_buf);
6392
6393                name_cache_free(sctx);
6394
6395                kfree(sctx);
6396        }
6397
6398        return ret;
6399}
6400